summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@linux.dev>2025-05-04 15:38:06 -0400
committerKent Overstreet <kent.overstreet@linux.dev>2025-05-04 15:52:42 -0400
commit401a20ed984b7ccee689202d9372045d41271bcc (patch)
tree61a8553d25b04459593fca2c350dfcb7def76150
parent6e4bda5ad5f7a43b90d9f22b8e86011e51569bf1 (diff)
Update bcachefs sources to b4927db2cdc7 bcachefs: bcachefs_metadata_version_fast_device_removal
Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs/alloc_foreground.c6
-rw-r--r--libbcachefs/bcachefs_format.h3
-rw-r--r--libbcachefs/btree_gc.c4
-rw-r--r--libbcachefs/disk_groups.c6
-rw-r--r--libbcachefs/extents.c53
-rw-r--r--libbcachefs/fsck.c121
-rw-r--r--libbcachefs/inode.c7
-rw-r--r--libbcachefs/journal_io.c122
-rw-r--r--libbcachefs/migrate.c107
-rw-r--r--libbcachefs/migrate.h3
-rw-r--r--libbcachefs/rebalance.c10
-rw-r--r--libbcachefs/recovery.c16
-rw-r--r--libbcachefs/sb-members.c29
-rw-r--r--libbcachefs/sb-members.h4
-rw-r--r--libbcachefs/sb-members_format.h4
-rw-r--r--libbcachefs/snapshot.c193
-rw-r--r--libbcachefs/super-io.c3
-rw-r--r--libbcachefs/super.c38
-rw-r--r--libbcachefs/super.h2
-rw-r--r--libbcachefs/thread_with_file.c4
21 files changed, 509 insertions, 228 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index f52da942..a4291689 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-5a0455ae19afb354634b3c5c9bf55d2171005a2f
+b4927db2cdc7f124f968f9eaa1d785298ae31c1a
diff --git a/libbcachefs/alloc_foreground.c b/libbcachefs/alloc_foreground.c
index a0f92daa..c17c5733 100644
--- a/libbcachefs/alloc_foreground.c
+++ b/libbcachefs/alloc_foreground.c
@@ -1255,6 +1255,9 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
if (unlikely(ret))
return ret;
+ if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
+ erasure_code = false;
+
req->nr_replicas = nr_replicas;
req->target = target;
req->ec = erasure_code;
@@ -1262,9 +1265,6 @@ int bch2_alloc_sectors_start_trans(struct btree_trans *trans,
req->flags = flags;
req->devs_have = devs_have;
- if (!IS_ENABLED(CONFIG_BCACHEFS_ERASURE_CODING))
- erasure_code = false;
-
BUG_ON(!nr_replicas || !nr_replicas_required);
retry:
req->ptrs.nr = 0;
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 0beff6af..baaf9786 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -696,7 +696,8 @@ struct bch_sb_field_ext {
x(stripe_lru, BCH_VERSION(1, 23)) \
x(casefolding, BCH_VERSION(1, 24)) \
x(extent_flags, BCH_VERSION(1, 25)) \
- x(snapshot_deletion_v2, BCH_VERSION(1, 26))
+ x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
+ x(fast_device_removal, BCH_VERSION(1, 27))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 92ae3173..dd08ec08 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -1079,6 +1079,10 @@ out:
* allocator thread - issue wakeup in case they blocked on gc_lock:
*/
closure_wake_up(&c->freelist_wait);
+
+ if (!ret && !test_bit(BCH_FS_errors_not_fixed, &c->flags))
+ bch2_sb_members_clean_deleted(c);
+
bch_err_fn(c, ret);
return ret;
}
diff --git a/libbcachefs/disk_groups.c b/libbcachefs/disk_groups.c
index c1a2a957..c20ecf5e 100644
--- a/libbcachefs/disk_groups.c
+++ b/libbcachefs/disk_groups.c
@@ -212,17 +212,13 @@ bool bch2_dev_in_target(struct bch_fs *c, unsigned dev, unsigned target)
case TARGET_DEV:
return dev == t.dev;
case TARGET_GROUP: {
- rcu_read_lock();
struct bch_disk_groups_cpu *g = rcu_dereference(c->disk_groups);
const struct bch_devs_mask *m =
g && t.group < g->nr && !g->entries[t.group].deleted
? &g->entries[t.group].devs
: NULL;
- bool ret = m ? test_bit(dev, m->d) : false;
- rcu_read_unlock();
-
- return ret;
+ return m ? test_bit(dev, m->d) : false;
}
default:
BUG();
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 18506061..2d41260a 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -1121,8 +1121,9 @@ bch2_extent_has_ptr(struct bkey_s_c k1, struct extent_ptr_decoded p1, struct bke
static bool want_cached_ptr(struct bch_fs *c, struct bch_io_opts *opts,
struct bch_extent_ptr *ptr)
{
- if (!opts->promote_target ||
- !bch2_dev_in_target(c, ptr->dev, opts->promote_target))
+ unsigned target = opts->promote_target ?: opts->foreground_target;
+
+ if (target && !bch2_dev_in_target(c, ptr->dev, target))
return false;
struct bch_dev *ca = bch2_dev_rcu_noerror(c, ptr->dev);
@@ -1135,33 +1136,43 @@ void bch2_extent_ptr_set_cached(struct bch_fs *c,
struct bkey_s k,
struct bch_extent_ptr *ptr)
{
- struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
+ struct bkey_ptrs ptrs;
union bch_extent_entry *entry;
struct extent_ptr_decoded p;
+ bool have_cached_ptr;
rcu_read_lock();
- if (!want_cached_ptr(c, opts, ptr)) {
- bch2_bkey_drop_ptr_noerror(k, ptr);
- goto out;
- }
+restart_drop_ptrs:
+ ptrs = bch2_bkey_ptrs(k);
+ have_cached_ptr = false;
- /*
- * Stripes can't contain cached data, for - reasons.
- *
- * Possibly something we can fix in the future?
- */
- bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (&entry->ptr == ptr) {
- if (p.has_ec)
- bch2_bkey_drop_ptr_noerror(k, ptr);
- else
- ptr->cached = true;
- goto out;
+ bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
+ /*
+ * Check if it's erasure coded - stripes can't contain cached
+ * data. Possibly something we can fix in the future?
+ */
+ if (&entry->ptr == ptr && p.has_ec)
+ goto drop;
+
+ if (p.ptr.cached) {
+ if (have_cached_ptr || !want_cached_ptr(c, opts, &p.ptr)) {
+ bch2_bkey_drop_ptr_noerror(k, &entry->ptr);
+ goto restart_drop_ptrs;
+ }
+
+ have_cached_ptr = true;
}
+ }
- BUG();
-out:
+ if (have_cached_ptr || !want_cached_ptr(c, opts, ptr))
+ goto drop;
+
+ ptr->cached = true;
+ rcu_read_unlock();
+ return;
+drop:
rcu_read_unlock();
+ bch2_bkey_drop_ptr_noerror(k, ptr);
}
/*
diff --git a/libbcachefs/fsck.c b/libbcachefs/fsck.c
index dd88113a..add23676 100644
--- a/libbcachefs/fsck.c
+++ b/libbcachefs/fsck.c
@@ -790,6 +790,7 @@ static int ref_visible2(struct bch_fs *c,
struct inode_walker_entry {
struct bch_inode_unpacked inode;
+ bool whiteout;
u64 count;
u64 i_size;
};
@@ -818,12 +819,20 @@ static struct inode_walker inode_walker_init(void)
static int add_inode(struct bch_fs *c, struct inode_walker *w,
struct bkey_s_c inode)
{
- struct bch_inode_unpacked u;
-
- return bch2_inode_unpack(inode, &u) ?:
- darray_push(&w->inodes, ((struct inode_walker_entry) {
- .inode = u,
+ int ret = darray_push(&w->inodes, ((struct inode_walker_entry) {
+ .whiteout = !bkey_is_inode(inode.k),
}));
+ if (ret)
+ return ret;
+
+ struct inode_walker_entry *n = &darray_last(w->inodes);
+ if (!n->whiteout) {
+ return bch2_inode_unpack(inode, &n->inode);
+ } else {
+ n->inode.bi_inum = inode.k->p.inode;
+ n->inode.bi_snapshot = inode.k->p.snapshot;
+ return 0;
+ }
}
static int get_inodes_all_snapshots(struct btree_trans *trans,
@@ -843,13 +852,12 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
w->recalculate_sums = false;
w->inodes.nr = 0;
- for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
+ for_each_btree_key_max_norestart(trans, iter,
+ BTREE_ID_inodes, POS(0, inum), SPOS(0, inum, U32_MAX),
+ BTREE_ITER_all_snapshots, k, ret) {
+ ret = add_inode(c, w, k);
+ if (ret)
break;
-
- if (bkey_is_inode(k.k))
- add_inode(c, w, k);
}
bch2_trans_iter_exit(trans, &iter);
@@ -861,6 +869,41 @@ static int get_inodes_all_snapshots(struct btree_trans *trans,
return 0;
}
+static int get_visible_inodes(struct btree_trans *trans,
+ struct inode_walker *w,
+ struct snapshots_seen *s,
+ u64 inum)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter iter;
+ struct bkey_s_c k;
+ int ret;
+
+ w->inodes.nr = 0;
+ w->deletes.nr = 0;
+
+ for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot),
+ BTREE_ITER_all_snapshots, k, ret) {
+ if (k.k->p.offset != inum)
+ break;
+
+ if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot))
+ continue;
+
+ if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot))
+ continue;
+
+ ret = bkey_is_inode(k.k)
+ ? add_inode(c, w, k)
+ : snapshot_list_add(c, &w->deletes, k.k->p.snapshot);
+ if (ret)
+ break;
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ return ret;
+}
+
static struct inode_walker_entry *
lookup_inode_for_snapshot(struct btree_trans *trans, struct inode_walker *w, struct bkey_s_c k)
{
@@ -891,8 +934,25 @@ found:
new.bi_snapshot = k.k->p.snapshot;
ret = __bch2_fsck_write_inode(trans, &new) ?:
- bch2_trans_commit(trans, NULL, NULL, 0) ?:
- -BCH_ERR_transaction_restart_nested;
+ bch2_trans_commit(trans, NULL, NULL, 0);
+ if (ret)
+ goto fsck_err;
+
+ struct inode_walker_entry new_entry = *i;
+
+ new_entry.inode.bi_snapshot = k.k->p.snapshot;
+ new_entry.count = 0;
+ new_entry.i_size = 0;
+
+ while (i > w->inodes.data && i[-1].inode.bi_snapshot > k.k->p.snapshot)
+ --i;
+
+ size_t pos = i - w->inodes.data;
+ ret = darray_insert_item(&w->inodes, pos, new_entry);
+ if (ret)
+ goto fsck_err;
+
+ ret = -BCH_ERR_transaction_restart_nested;
goto fsck_err;
}
@@ -918,41 +978,6 @@ static struct inode_walker_entry *walk_inode(struct btree_trans *trans,
return lookup_inode_for_snapshot(trans, w, k);
}
-static int get_visible_inodes(struct btree_trans *trans,
- struct inode_walker *w,
- struct snapshots_seen *s,
- u64 inum)
-{
- struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- int ret;
-
- w->inodes.nr = 0;
- w->deletes.nr = 0;
-
- for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot),
- BTREE_ITER_all_snapshots, k, ret) {
- if (k.k->p.offset != inum)
- break;
-
- if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot))
- continue;
-
- if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot))
- continue;
-
- ret = bkey_is_inode(k.k)
- ? add_inode(c, w, k)
- : snapshot_list_add(c, &w->deletes, k.k->p.snapshot);
- if (ret)
- break;
- }
- bch2_trans_iter_exit(trans, &iter);
-
- return ret;
-}
-
/*
* Prefer to delete the first one, since that will be the one at the wrong
* offset:
diff --git a/libbcachefs/inode.c b/libbcachefs/inode.c
index b51d98cf..bc68bae8 100644
--- a/libbcachefs/inode.c
+++ b/libbcachefs/inode.c
@@ -240,6 +240,7 @@ static int bch2_inode_unpack_v3(struct bkey_s_c k,
u64 v[2];
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
@@ -284,13 +285,12 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
{
memset(unpacked, 0, sizeof(*unpacked));
- unpacked->bi_snapshot = k.k->p.snapshot;
-
switch (k.k->type) {
case KEY_TYPE_inode: {
struct bkey_s_c_inode inode = bkey_s_c_to_inode(k);
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= 0;
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le32_to_cpu(inode.v->bi_flags);
@@ -309,6 +309,7 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
struct bkey_s_c_inode_v2 inode = bkey_s_c_to_inode_v2(k);
unpacked->bi_inum = inode.k->p.offset;
+ unpacked->bi_snapshot = inode.k->p.snapshot;
unpacked->bi_journal_seq= le64_to_cpu(inode.v->bi_journal_seq);
unpacked->bi_hash_seed = inode.v->bi_hash_seed;
unpacked->bi_flags = le64_to_cpu(inode.v->bi_flags);
@@ -326,8 +327,6 @@ static noinline int bch2_inode_unpack_slowpath(struct bkey_s_c k,
int bch2_inode_unpack(struct bkey_s_c k,
struct bch_inode_unpacked *unpacked)
{
- unpacked->bi_snapshot = k.k->p.snapshot;
-
return likely(k.k->type == KEY_TYPE_inode_v3)
? bch2_inode_unpack_v3(k, unpacked)
: bch2_inode_unpack_slowpath(k, unpacked);
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 55b76a4d..e91c9848 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -1465,6 +1465,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ rcu_read_lock();
darray_for_each(*devs, i) {
struct bch_dev *ca = rcu_dereference(c->devs[*i]);
if (!ca)
@@ -1486,6 +1487,7 @@ static void journal_advance_devs_to_next_bucket(struct journal *j,
ja->bucket_seq[ja->cur_idx] = le64_to_cpu(seq);
}
}
+ rcu_read_unlock();
}
static void __journal_write_alloc(struct journal *j,
@@ -1498,7 +1500,8 @@ static void __journal_write_alloc(struct journal *j,
struct bch_fs *c = container_of(j, struct bch_fs, journal);
darray_for_each(*devs, i) {
- struct bch_dev *ca = rcu_dereference(c->devs[*i]);
+ struct bch_dev *ca = bch2_dev_get_ioref(c, *i, WRITE,
+ BCH_DEV_WRITE_REF_journal_write);
if (!ca)
continue;
@@ -1512,8 +1515,10 @@ static void __journal_write_alloc(struct journal *j,
ca->mi.state != BCH_MEMBER_STATE_rw ||
!ja->nr ||
bch2_bkey_has_device_c(bkey_i_to_s_c(&w->key), ca->dev_idx) ||
- sectors > ja->sectors_free)
+ sectors > ja->sectors_free) {
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
continue;
+ }
bch2_dev_stripe_increment(ca, &j->wp.stripe);
@@ -1536,15 +1541,8 @@ static void __journal_write_alloc(struct journal *j,
}
}
-/**
- * journal_write_alloc - decide where to write next journal entry
- *
- * @j: journal object
- * @w: journal buf (entry to be written)
- *
- * Returns: 0 on success, or -BCH_ERR_insufficient_devices on failure
- */
-static int journal_write_alloc(struct journal *j, struct journal_buf *w)
+static int journal_write_alloc(struct journal *j, struct journal_buf *w,
+ unsigned *replicas)
{
struct bch_fs *c = container_of(j, struct bch_fs, journal);
struct bch_devs_mask devs;
@@ -1552,29 +1550,18 @@ static int journal_write_alloc(struct journal *j, struct journal_buf *w)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
unsigned target = c->opts.metadata_target ?:
c->opts.foreground_target;
- unsigned replicas = 0, replicas_want =
- READ_ONCE(c->opts.metadata_replicas);
+ unsigned replicas_want = READ_ONCE(c->opts.metadata_replicas);
unsigned replicas_need = min_t(unsigned, replicas_want,
READ_ONCE(c->opts.metadata_replicas_required));
bool advance_done = false;
- rcu_read_lock();
-
- /* We might run more than once if we have to stop and do discards: */
- struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(bkey_i_to_s_c(&w->key));
- bkey_for_each_ptr(ptrs, p) {
- struct bch_dev *ca = bch2_dev_rcu_noerror(c, p->dev);
- if (ca)
- replicas += ca->mi.durability;
- }
-
retry_target:
devs = target_rw_devs(c, BCH_DATA_journal, target);
devs_sorted = bch2_dev_alloc_list(c, &j->wp.stripe, &devs);
retry_alloc:
- __journal_write_alloc(j, w, &devs_sorted, sectors, &replicas, replicas_want);
+ __journal_write_alloc(j, w, &devs_sorted, sectors, replicas, replicas_want);
- if (likely(replicas >= replicas_want))
+ if (likely(*replicas >= replicas_want))
goto done;
if (!advance_done) {
@@ -1583,18 +1570,16 @@ retry_alloc:
goto retry_alloc;
}
- if (replicas < replicas_want && target) {
+ if (*replicas < replicas_want && target) {
/* Retry from all devices: */
target = 0;
advance_done = false;
goto retry_target;
}
done:
- rcu_read_unlock();
-
BUG_ON(bkey_val_u64s(&w->key.k) > BCH_REPLICAS_MAX);
- return replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
+ return *replicas >= replicas_need ? 0 : -BCH_ERR_insufficient_journal_devices;
}
static void journal_buf_realloc(struct journal *j, struct journal_buf *buf)
@@ -1780,13 +1765,7 @@ static CLOSURE_CALLBACK(journal_write_submit)
unsigned sectors = vstruct_sectors(w->data, c->block_bits);
extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
- struct bch_dev *ca = bch2_dev_get_ioref(c, ptr->dev, WRITE,
- BCH_DEV_WRITE_REF_journal_write);
- if (!ca) {
- /* XXX: fix this */
- bch_err(c, "missing device %u for journal write", ptr->dev);
- continue;
- }
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
sectors);
@@ -2066,57 +2045,45 @@ CLOSURE_CALLBACK(bch2_journal_write)
j->write_start_time = local_clock();
+ mutex_lock(&j->buf_lock);
+ journal_buf_realloc(j, w);
+
+ ret = bch2_journal_write_prep(j, w);
+ mutex_unlock(&j->buf_lock);
+
+ if (unlikely(ret))
+ goto err;
+
spin_lock(&j->lock);
if (nr_rw_members > 1)
w->separate_flush = true;
ret = bch2_journal_write_pick_flush(j, w);
spin_unlock(&j->lock);
- if (ret)
- goto err;
-
- mutex_lock(&j->buf_lock);
- journal_buf_realloc(j, w);
- ret = bch2_journal_write_prep(j, w);
- mutex_unlock(&j->buf_lock);
- if (ret)
+ if (unlikely(ret))
goto err;
- j->entry_bytes_written += vstruct_bytes(w->data);
-
+ unsigned replicas_allocated = 0;
while (1) {
- spin_lock(&j->lock);
- ret = journal_write_alloc(j, w);
+ ret = journal_write_alloc(j, w, &replicas_allocated);
if (!ret || !j->can_discard)
break;
- spin_unlock(&j->lock);
bch2_journal_do_discards(j);
}
- if (ret && !bch2_journal_error(j)) {
- struct printbuf buf = PRINTBUF;
- buf.atomic++;
-
- __bch2_journal_debug_to_text(&buf, j);
- spin_unlock(&j->lock);
- prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
- le64_to_cpu(w->data->seq),
- vstruct_sectors(w->data, c->block_bits),
- bch2_err_str(ret));
- bch2_print_str(c, KERN_ERR, buf.buf);
- printbuf_exit(&buf);
- }
- if (ret)
- goto err;
+ if (unlikely(ret))
+ goto err_allocate_write;
+ spin_lock(&j->lock);
/*
* write is allocated, no longer need to account for it in
* bch2_journal_space_available():
*/
w->sectors = 0;
w->write_allocated = true;
+ j->entry_bytes_written += vstruct_bytes(w->data);
/*
* journal entry has been compacted and allocated, recalculate space
@@ -2128,9 +2095,6 @@ CLOSURE_CALLBACK(bch2_journal_write)
w->devs_written = bch2_bkey_devs(bkey_i_to_s_c(&w->key));
- if (c->opts.nochanges)
- goto no_io;
-
/*
* Mark journal replicas before we submit the write to guarantee
* recovery will find the journal entries after a crash.
@@ -2141,15 +2105,33 @@ CLOSURE_CALLBACK(bch2_journal_write)
if (ret)
goto err;
+ if (c->opts.nochanges)
+ goto no_io;
+
if (!JSET_NO_FLUSH(w->data))
continue_at(cl, journal_write_preflush, j->wq);
else
continue_at(cl, journal_write_submit, j->wq);
return;
-no_io:
- continue_at(cl, journal_write_done, j->wq);
- return;
+err_allocate_write:
+ if (!bch2_journal_error(j)) {
+ struct printbuf buf = PRINTBUF;
+
+ bch2_journal_debug_to_text(&buf, j);
+ prt_printf(&buf, bch2_fmt(c, "Unable to allocate journal write at seq %llu for %zu sectors: %s"),
+ le64_to_cpu(w->data->seq),
+ vstruct_sectors(w->data, c->block_bits),
+ bch2_err_str(ret));
+ bch2_print_str(c, KERN_ERR, buf.buf);
+ printbuf_exit(&buf);
+ }
err:
bch2_fatal_error(c);
+no_io:
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+ struct bch_dev *ca = bch2_dev_have_ref(c, ptr->dev);
+ enumerated_ref_put(&ca->io_ref[WRITE], BCH_DEV_WRITE_REF_journal_write);
+ }
+
continue_at(cl, journal_write_done, j->wq);
}
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index 90dcf80b..f431586a 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -4,9 +4,11 @@
*/
#include "bcachefs.h"
+#include "backpointers.h"
#include "bkey_buf.h"
#include "btree_update.h"
#include "btree_update_interior.h"
+#include "btree_write_buffer.h"
#include "buckets.h"
#include "errcode.h"
#include "extents.h"
@@ -20,7 +22,7 @@
#include "super-io.h"
static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
- unsigned dev_idx, int flags, bool metadata)
+ unsigned dev_idx, unsigned flags, bool metadata)
{
unsigned replicas = metadata ? c->opts.metadata_replicas : c->opts.data_replicas;
unsigned lost = metadata ? BCH_FORCE_IF_METADATA_LOST : BCH_FORCE_IF_DATA_LOST;
@@ -37,11 +39,28 @@ static int drop_dev_ptrs(struct bch_fs *c, struct bkey_s k,
return 0;
}
+static int drop_btree_ptrs(struct btree_trans *trans, struct btree_iter *iter,
+ struct btree *b, unsigned dev_idx, unsigned flags)
+{
+ struct bch_fs *c = trans->c;
+ struct bkey_buf k;
+
+ bch2_bkey_buf_init(&k);
+ bch2_bkey_buf_copy(&k, c, &b->key);
+
+ int ret = drop_dev_ptrs(c, bkey_i_to_s(k.k), dev_idx, flags, true) ?:
+ bch2_btree_node_update_key(trans, iter, b, k.k, 0, false);
+
+ bch_err_fn(c, ret);
+ bch2_bkey_buf_exit(&k, c);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
unsigned dev_idx,
- int flags)
+ unsigned flags)
{
struct bch_fs *c = trans->c;
struct bkey_i *n;
@@ -77,9 +96,27 @@ static int bch2_dev_usrdata_drop_key(struct btree_trans *trans,
return 0;
}
+static int bch2_dev_btree_drop_key(struct btree_trans *trans,
+ struct bkey_s_c_backpointer bp,
+ unsigned dev_idx,
+ struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct btree *b = bch2_backpointer_get_node(trans, bp, &iter, last_flushed);
+ int ret = PTR_ERR_OR_ZERO(b);
+ if (ret)
+ return ret == -BCH_ERR_backpointer_to_overwritten_btree_node ? 0 : ret;
+
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
+
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
static int bch2_dev_usrdata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans = bch2_trans_get(c);
enum btree_id id;
@@ -106,7 +143,7 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
static int bch2_dev_metadata_drop(struct bch_fs *c,
struct progress_indicator_state *progress,
- unsigned dev_idx, int flags)
+ unsigned dev_idx, unsigned flags)
{
struct btree_trans *trans;
struct btree_iter iter;
@@ -137,20 +174,12 @@ retry:
if (!bch2_bkey_has_device_c(bkey_i_to_s_c(&b->key), dev_idx))
goto next;
- bch2_bkey_buf_copy(&k, c, &b->key);
-
- ret = drop_dev_ptrs(c, bkey_i_to_s(k.k),
- dev_idx, flags, true);
- if (ret)
- break;
-
- ret = bch2_btree_node_update_key(trans, &iter, b, k.k, 0, false);
+ ret = drop_btree_ptrs(trans, &iter, b, dev_idx, flags);
if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) {
ret = 0;
continue;
}
- bch_err_msg(c, ret, "updating btree node key");
if (ret)
break;
next:
@@ -176,7 +205,57 @@ err:
return ret;
}
-int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, int flags)
+static int data_drop_bp(struct btree_trans *trans, unsigned dev_idx,
+ struct bkey_s_c_backpointer bp, struct bkey_buf *last_flushed,
+ unsigned flags)
+{
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_backpointer_get_key(trans, bp, &iter, 0, last_flushed);
+ int ret = bkey_err(k);
+ if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node)
+ return 0;
+ if (ret)
+ return ret;
+
+ if (!bch2_bkey_has_device_c(k, dev_idx))
+ goto out;
+
+ ret = bkey_is_btree_ptr(k.k)
+ ? bch2_dev_btree_drop_key(trans, bp, dev_idx, last_flushed, flags)
+ : bch2_dev_usrdata_drop_key(trans, &iter, k, dev_idx, flags);
+out:
+ bch2_trans_iter_exit(trans, &iter);
+ return ret;
+}
+
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *c, unsigned dev_idx, unsigned flags)
+{
+ struct btree_trans *trans = bch2_trans_get(c);
+
+ struct bkey_buf last_flushed;
+ bch2_bkey_buf_init(&last_flushed);
+ bkey_init(&last_flushed.k->k);
+
+ int ret = bch2_btree_write_buffer_flush_sync(trans) ?:
+ for_each_btree_key_max_commit(trans, iter, BTREE_ID_backpointers,
+ POS(dev_idx, 0),
+ POS(dev_idx, U64_MAX), 0, k,
+ NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ if (k.k->type != KEY_TYPE_backpointer)
+ continue;
+
+ data_drop_bp(trans, dev_idx, bkey_s_c_to_backpointer(k),
+ &last_flushed, flags);
+
+ }));
+
+ bch2_bkey_buf_exit(&last_flushed, trans->c);
+ bch2_trans_put(trans);
+ bch_err_fn(c, ret);
+ return ret;
+}
+
+int bch2_dev_data_drop(struct bch_fs *c, unsigned dev_idx, unsigned flags)
{
struct progress_indicator_state progress;
bch2_progress_init(&progress, c,
diff --git a/libbcachefs/migrate.h b/libbcachefs/migrate.h
index 027efaa0..30018140 100644
--- a/libbcachefs/migrate.h
+++ b/libbcachefs/migrate.h
@@ -2,6 +2,7 @@
#ifndef _BCACHEFS_MIGRATE_H
#define _BCACHEFS_MIGRATE_H
-int bch2_dev_data_drop(struct bch_fs *, unsigned, int);
+int bch2_dev_data_drop_by_backpointers(struct bch_fs *, unsigned, unsigned);
+int bch2_dev_data_drop(struct bch_fs *, unsigned, unsigned);
#endif /* _BCACHEFS_MIGRATE_H */
diff --git a/libbcachefs/rebalance.c b/libbcachefs/rebalance.c
index a81ffff4..a94f66ff 100644
--- a/libbcachefs/rebalance.c
+++ b/libbcachefs/rebalance.c
@@ -80,11 +80,13 @@ static inline unsigned bch2_bkey_ptrs_need_move(struct bch_fs *c,
unsigned ptr_bit = 1;
unsigned rewrite_ptrs = 0;
+ rcu_read_lock();
bkey_for_each_ptr(ptrs, ptr) {
if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, opts->background_target))
rewrite_ptrs |= ptr_bit;
ptr_bit <<= 1;
}
+ rcu_read_unlock();
return rewrite_ptrs;
}
@@ -132,10 +134,14 @@ u64 bch2_bkey_sectors_need_rebalance(struct bch_fs *c, struct bkey_s_c k)
}
}
incompressible:
- if (opts->background_target)
+ if (opts->background_target) {
+ rcu_read_lock();
bkey_for_each_ptr_decode(k.k, ptrs, p, entry)
- if (!p.ptr.cached && !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
+ if (!p.ptr.cached &&
+ !bch2_dev_in_target(c, p.ptr.dev, opts->background_target))
sectors += p.crc.compressed_size;
+ rcu_read_unlock();
+ }
return sectors;
}
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index 6aae686f..375111b5 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -737,11 +737,6 @@ int bch2_fs_recovery(struct bch_fs *c)
c->opts.read_only = true;
}
- if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
- bch_info(c, "filesystem is an unresized image file, mounting ro");
- c->opts.read_only = true;
- }
-
mutex_lock(&c->sb_lock);
struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
bool write_sb = false;
@@ -895,6 +890,17 @@ use_clean:
if (ret)
goto err;
+ ret = bch2_fs_resize_on_mount(c);
+ if (ret) {
+ up_write(&c->state_lock);
+ goto err;
+ }
+
+ if (c->sb.features & BIT_ULL(BCH_FEATURE_small_image)) {
+ bch_info(c, "filesystem is an unresized image file, mounting ro");
+ c->opts.read_only = true;
+ }
+
if (!c->opts.read_only &&
(c->sb.features & BIT_ULL(BCH_FEATURE_no_alloc_info))) {
bch_info(c, "mounting a filesystem with no alloc info read-write; will recreate");
diff --git a/libbcachefs/sb-members.c b/libbcachefs/sb-members.c
index 9c383d9a..e5c68c2f 100644
--- a/libbcachefs/sb-members.c
+++ b/libbcachefs/sb-members.c
@@ -525,6 +525,7 @@ int bch2_sb_member_alloc(struct bch_fs *c)
unsigned u64s;
int best = -1;
u64 best_last_mount = 0;
+ unsigned nr_deleted = 0;
if (dev_idx < BCH_SB_MEMBERS_MAX)
goto have_slot;
@@ -535,7 +536,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
continue;
struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
- if (bch2_member_alive(&m))
+
+ nr_deleted += uuid_equal(&m.uuid, &BCH_SB_MEMBER_DELETED_UUID);
+
+ if (!bch2_is_zero(&m.uuid, sizeof(m.uuid)))
continue;
u64 last_mount = le64_to_cpu(m.last_mount);
@@ -549,6 +553,10 @@ int bch2_sb_member_alloc(struct bch_fs *c)
goto have_slot;
}
+ if (nr_deleted)
+ bch_err(c, "unable to allocate new member, but have %u deleted: run fsck",
+ nr_deleted);
+
return -BCH_ERR_ENOSPC_sb_members;
have_slot:
nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
@@ -564,3 +572,22 @@ have_slot:
c->disk_sb.sb->nr_devices = nr_devices;
return dev_idx;
}
+
+void bch2_sb_members_clean_deleted(struct bch_fs *c)
+{
+ mutex_lock(&c->sb_lock);
+ bool write_sb = false;
+
+ for (unsigned i = 0; i < c->sb.nr_devices; i++) {
+ struct bch_member *m = bch2_members_v2_get_mut(c->disk_sb.sb, i);
+
+ if (uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID)) {
+ memset(&m->uuid, 0, sizeof(m->uuid));
+ write_sb = true;
+ }
+ }
+
+ if (write_sb)
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+}
diff --git a/libbcachefs/sb-members.h b/libbcachefs/sb-members.h
index c9cb8f76..6bd9b86a 100644
--- a/libbcachefs/sb-members.h
+++ b/libbcachefs/sb-members.h
@@ -320,7 +320,8 @@ extern const struct bch_sb_field_ops bch_sb_field_ops_members_v2;
static inline bool bch2_member_alive(struct bch_member *m)
{
- return !bch2_is_zero(&m->uuid, sizeof(m->uuid));
+ return !bch2_is_zero(&m->uuid, sizeof(m->uuid)) &&
+ !uuid_equal(&m->uuid, &BCH_SB_MEMBER_DELETED_UUID);
}
static inline bool bch2_member_exists(struct bch_sb *sb, unsigned dev)
@@ -381,5 +382,6 @@ bool bch2_dev_btree_bitmap_marked(struct bch_fs *, struct bkey_s_c);
void bch2_dev_btree_bitmap_mark(struct bch_fs *, struct bkey_s_c);
int bch2_sb_member_alloc(struct bch_fs *);
+void bch2_sb_members_clean_deleted(struct bch_fs *);
#endif /* _BCACHEFS_SB_MEMBERS_H */
diff --git a/libbcachefs/sb-members_format.h b/libbcachefs/sb-members_format.h
index 472218a5..fb72ad73 100644
--- a/libbcachefs/sb-members_format.h
+++ b/libbcachefs/sb-members_format.h
@@ -13,6 +13,10 @@
*/
#define BCH_SB_MEMBER_INVALID 255
+#define BCH_SB_MEMBER_DELETED_UUID \
+ UUID_INIT(0xffffffff, 0xffff, 0xffff, \
+ 0xd9, 0x6a, 0x60, 0xcf, 0x80, 0x3d, 0xf7, 0xef)
+
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
diff --git a/libbcachefs/snapshot.c b/libbcachefs/snapshot.c
index f074b9de..fc8fb076 100644
--- a/libbcachefs/snapshot.c
+++ b/libbcachefs/snapshot.c
@@ -1427,6 +1427,12 @@ static unsigned live_child(struct bch_fs *c, u32 id)
return ret;
}
+static bool snapshot_id_dying(struct snapshot_delete *d, unsigned id)
+{
+ return snapshot_list_has_id(&d->delete_leaves, id) ||
+ interior_delete_has_id(&d->delete_interior, id) != 0;
+}
+
static int delete_dead_snapshots_process_key(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k)
@@ -1468,11 +1474,20 @@ static int delete_dead_snapshots_process_key(struct btree_trans *trans,
return 0;
}
-static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter)
+static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree_iter *iter, u64 *prev_inum)
{
struct bch_fs *c = trans->c;
struct snapshot_delete *d = &c->snapshot_delete;
+ u64 inum = iter->btree_id != BTREE_ID_inodes
+ ? iter->pos.inode
+ : iter->pos.offset;
+
+ if (*prev_inum == inum)
+ return false;
+
+ *prev_inum = inum;
+
bool ret = !snapshot_list_has_id(&d->deleting_from_trees,
bch2_snapshot_tree(c, iter->pos.snapshot));
if (unlikely(ret)) {
@@ -1486,6 +1501,129 @@ static bool skip_unrelated_snapshot_tree(struct btree_trans *trans, struct btree
return ret;
}
+static int delete_dead_snapshot_keys_v1(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+
+ for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+
+ d->pos.pos = POS_MIN;
+
+ if (!btree_type_has_snapshots(d->pos.btree))
+ continue;
+
+ int ret = for_each_btree_key_commit(trans, iter,
+ d->pos.btree, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+
+ bch2_disk_reservation_put(c, &res);
+
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int delete_dead_snapshot_keys_range(struct btree_trans *trans, enum btree_id btree,
+ struct bpos start, struct bpos end)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+
+ d->pos.btree = btree;
+ d->pos.pos = POS_MIN;
+
+ int ret = for_each_btree_key_max_commit(trans, iter,
+ btree, start, end,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.pos = iter.pos;
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+
+ bch2_disk_reservation_put(c, &res);
+ return ret;
+}
+
+static int delete_dead_snapshot_keys_v2(struct btree_trans *trans)
+{
+ struct bch_fs *c = trans->c;
+ struct snapshot_delete *d = &c->snapshot_delete;
+ struct disk_reservation res = { 0 };
+ u64 prev_inum = 0;
+ int ret = 0;
+
+ struct btree_iter iter;
+ bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots);
+
+ while (1) {
+ struct bkey_s_c k;
+ ret = lockrestart_do(trans,
+ bkey_err(k = bch2_btree_iter_peek(trans, &iter)));
+ if (ret)
+ break;
+
+ if (!k.k)
+ break;
+
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ if (snapshot_id_dying(d, k.k->p.snapshot)) {
+ struct bpos start = POS(k.k->p.offset, 0);
+ struct bpos end = POS(k.k->p.offset, U64_MAX);
+
+ ret = delete_dead_snapshot_keys_range(trans, BTREE_ID_extents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_dirents, start, end) ?:
+ delete_dead_snapshot_keys_range(trans, BTREE_ID_xattrs, start, end);
+ if (ret)
+ break;
+
+ bch2_btree_iter_set_pos(trans, &iter, POS(0, k.k->p.offset + 1));
+ } else {
+ bch2_btree_iter_advance(trans, &iter);
+ }
+ }
+ bch2_trans_iter_exit(trans, &iter);
+
+ if (ret)
+ goto err;
+
+ prev_inum = 0;
+ ret = for_each_btree_key_commit(trans, iter,
+ BTREE_ID_inodes, POS_MIN,
+ BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
+ &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
+ d->pos.btree = iter.btree_id;
+ d->pos.pos = iter.pos;
+
+ if (skip_unrelated_snapshot_tree(trans, &iter, &prev_inum))
+ continue;
+
+ delete_dead_snapshots_process_key(trans, &iter, k);
+ }));
+err:
+ bch2_disk_reservation_put(c, &res);
+ return ret;
+}
+
/*
* For a given snapshot, if it doesn't have a subvolume that points to it, and
* it doesn't have child snapshot nodes - it's now redundant and we can mark it
@@ -1500,6 +1638,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
struct snapshot_delete *d = &c->snapshot_delete;
struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k);
unsigned live_children = 0;
+ int ret = 0;
if (BCH_SNAPSHOT_SUBVOL(s.v))
return 0;
@@ -1507,6 +1646,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
if (BCH_SNAPSHOT_DELETED(s.v))
return 0;
+ mutex_lock(&d->lock);
for (unsigned i = 0; i < 2; i++) {
u32 child = le32_to_cpu(s.v->children[i]);
@@ -1517,7 +1657,7 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
u32 tree = bch2_snapshot_tree(c, s.k->p.offset);
if (live_children == 0) {
- return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
snapshot_list_add(c, &d->delete_leaves, s.k->p.offset);
} else if (live_children == 1) {
struct snapshot_interior_delete n = {
@@ -1527,14 +1667,15 @@ static int check_should_delete_snapshot(struct btree_trans *trans, struct bkey_s
if (!n.live_child) {
bch_err(c, "error finding live child of snapshot %u", n.id);
- return -EINVAL;
+ ret = -EINVAL;
+ } else {
+ ret = snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
+ darray_push(&d->delete_interior, n);
}
-
- return snapshot_list_add_nodup(c, &d->deleting_from_trees, tree) ?:
- darray_push(&d->delete_interior, n);
- } else {
- return 0;
}
+ mutex_unlock(&d->lock);
+
+ return ret;
}
static inline u32 bch2_snapshot_nth_parent_skip(struct bch_fs *c, u32 id, u32 n,
@@ -1641,13 +1782,11 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
* For every snapshot node: If we have no live children and it's not
* pointed to by a subvolume, delete it:
*/
- mutex_lock(&d->lock);
d->running = true;
d->pos = BBPOS_MIN;
ret = for_each_btree_key(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k,
check_should_delete_snapshot(trans, k));
- mutex_unlock(&d->lock);
if (!bch2_err_matches(ret, EROFS))
bch_err_msg(c, ret, "walking snapshots");
if (ret)
@@ -1666,33 +1805,13 @@ int bch2_delete_dead_snapshots(struct bch_fs *c)
goto err;
}
- for (d->pos.btree = 0; d->pos.btree < BTREE_ID_NR; d->pos.btree++) {
- struct disk_reservation res = { 0 };
-
- d->pos.pos = POS_MIN;
-
- if (!btree_type_has_snapshots(d->pos.btree))
- continue;
-
- ret = for_each_btree_key_commit(trans, iter,
- d->pos.btree, POS_MIN,
- BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
- &res, NULL, BCH_TRANS_COMMIT_no_enospc, ({
- d->pos.pos = iter.pos;
-
- if (skip_unrelated_snapshot_tree(trans, &iter))
- continue;
-
- delete_dead_snapshots_process_key(trans, &iter, k);
- }));
-
- bch2_disk_reservation_put(c, &res);
-
- if (!bch2_err_matches(ret, EROFS))
- bch_err_msg(c, ret, "deleting keys from dying snapshots");
- if (ret)
- goto err;
- }
+ ret = !bch2_request_incompat_feature(c, bcachefs_metadata_version_snapshot_deletion_v2)
+ ? delete_dead_snapshot_keys_v2(trans)
+ : delete_dead_snapshot_keys_v1(trans);
+ if (!bch2_err_matches(ret, EROFS))
+ bch_err_msg(c, ret, "deleting keys from dying snapshots");
+ if (ret)
+ goto err;
darray_for_each(d->delete_leaves, i) {
ret = commit_do(trans, NULL, NULL, 0,
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index 8730d2e7..6687b923 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -87,7 +87,8 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
struct printbuf buf = PRINTBUF;
prt_str(&buf, "requested incompat feature ");
bch2_version_to_text(&buf, version);
- prt_str(&buf, " currently not enabled");
+ prt_str(&buf, " currently not enabled, allowed up to ");
+ bch2_version_to_text(&buf, version);
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
bch_notice(c, "%s", buf.buf);
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index 96264686..18d8823c 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -214,7 +214,6 @@ static int bch2_dev_sysfs_online(struct bch_fs *, struct bch_dev *);
static void bch2_dev_io_ref_stop(struct bch_dev *, int);
static void __bch2_dev_read_only(struct bch_fs *, struct bch_dev *);
static int bch2_fs_init_rw(struct bch_fs *);
-static int bch2_fs_resize_on_mount(struct bch_fs *);
struct bch_fs *bch2_dev_to_fs(dev_t dev)
{
@@ -1150,15 +1149,11 @@ int bch2_fs_start(struct bch_fs *c)
cpu_to_le64(now);
rcu_read_unlock();
- bch2_write_super(c);
+ /*
+ * Dno't write superblock yet: recovery might have to downgrade
+ */
mutex_unlock(&c->sb_lock);
- ret = bch2_fs_resize_on_mount(c);
- if (ret) {
- up_write(&c->state_lock);
- goto err;
- }
-
rcu_read_lock();
for_each_online_member_rcu(c, ca)
if (ca->mi.state == BCH_MEMBER_STATE_rw)
@@ -1724,6 +1719,8 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
{
struct bch_member *m;
unsigned dev_idx = ca->dev_idx, data;
+ bool fast_device_removal = !bch2_request_incompat_feature(c,
+ bcachefs_metadata_version_fast_device_removal);
int ret;
down_write(&c->state_lock);
@@ -1742,11 +1739,24 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
__bch2_dev_read_only(c, ca);
- ret = bch2_dev_data_drop(c, ca->dev_idx, flags);
- bch_err_msg(ca, ret, "bch2_dev_data_drop()");
+ ret = fast_device_removal
+ ? bch2_dev_data_drop_by_backpointers(c, ca->dev_idx, flags)
+ : bch2_dev_data_drop(c, ca->dev_idx, flags);
if (ret)
goto err;
+ /* Check if device still has data */
+ struct bch_dev_usage usage = bch2_dev_usage_read(ca);
+ for (unsigned i = 0; i < BCH_DATA_NR; i++)
+ if (!data_type_is_empty(i) &&
+ !data_type_is_hidden(i) &&
+ usage.buckets[i]) {
+ bch_err(ca, "Remove failed: still has data (%s, %llu buckets)",
+ __bch2_data_types[i], usage.buckets[i]);
+ ret = -EBUSY;
+ goto err;
+ }
+
ret = bch2_dev_remove_alloc(c, ca);
bch_err_msg(ca, ret, "bch2_dev_remove_alloc()");
if (ret)
@@ -1810,7 +1820,11 @@ int bch2_dev_remove(struct bch_fs *c, struct bch_dev *ca, int flags)
*/
mutex_lock(&c->sb_lock);
m = bch2_members_v2_get_mut(c->disk_sb.sb, dev_idx);
- memset(&m->uuid, 0, sizeof(m->uuid));
+
+ if (fast_device_removal)
+ m->uuid = BCH_SB_MEMBER_DELETED_UUID;
+ else
+ memset(&m->uuid, 0, sizeof(m->uuid));
bch2_write_super(c);
@@ -2120,7 +2134,7 @@ err:
return ret;
}
-static int bch2_fs_resize_on_mount(struct bch_fs *c)
+int bch2_fs_resize_on_mount(struct bch_fs *c)
{
for_each_online_member(c, ca, BCH_DEV_READ_REF_fs_resize_on_mount) {
u64 old_nbuckets = ca->mi.nbuckets;
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index 7252544c..be75603f 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -38,6 +38,8 @@ void bch2_fs_read_only(struct bch_fs *);
int bch2_fs_read_write(struct bch_fs *);
int bch2_fs_read_write_early(struct bch_fs *);
+int bch2_fs_resize_on_mount(struct bch_fs *);
+
void __bch2_fs_stop(struct bch_fs *);
void bch2_fs_free(struct bch_fs *);
void bch2_fs_stop(struct bch_fs *);
diff --git a/libbcachefs/thread_with_file.c b/libbcachefs/thread_with_file.c
index dea73bc1..314a24d1 100644
--- a/libbcachefs/thread_with_file.c
+++ b/libbcachefs/thread_with_file.c
@@ -455,8 +455,10 @@ ssize_t bch2_stdio_redirect_vprintf(struct stdio_redirect *stdio, bool nonblocki
struct stdio_buf *buf = &stdio->output;
unsigned long flags;
ssize_t ret;
-
again:
+ if (stdio->done)
+ return -EPIPE;
+
spin_lock_irqsave(&buf->lock, flags);
ret = bch2_darray_vprintf(&buf->buf, GFP_NOWAIT, fmt, args);
spin_unlock_irqrestore(&buf->lock, flags);