summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--libbcachefs.c25
-rw-r--r--libbcachefs/bcachefs_format.h24
-rw-r--r--libbcachefs/btree_gc.c174
-rw-r--r--libbcachefs/btree_gc.h1
-rw-r--r--libbcachefs/btree_types.h2
-rw-r--r--libbcachefs/btree_update_leaf.c71
-rw-r--r--libbcachefs/buckets.c125
-rw-r--r--libbcachefs/buckets.h1
-rw-r--r--libbcachefs/extents.c85
-rw-r--r--libbcachefs/io.c8
-rw-r--r--libbcachefs/io.h3
-rw-r--r--libbcachefs/journal_io.c2
-rw-r--r--libbcachefs/migrate.c5
-rw-r--r--libbcachefs/move.c8
-rw-r--r--libbcachefs/opts.c15
-rw-r--r--libbcachefs/opts.h1
-rw-r--r--libbcachefs/recovery.c3
-rw-r--r--libbcachefs/replicas.c272
-rw-r--r--libbcachefs/replicas.h14
-rw-r--r--libbcachefs/super-io.c42
-rw-r--r--libbcachefs/super-io.h1
-rw-r--r--libbcachefs/sysfs.c16
-rw-r--r--libbcachefs/xattr.c10
24 files changed, 574 insertions, 336 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index b4ec3188..48cf256f 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-d7dbddc4504c1bf509f8eb5818b5042342dc9ed1
+a9f14c773fb122a4b283fc7b79d9f98703a18890
diff --git a/libbcachefs.c b/libbcachefs.c
index 968748af..98f058d7 100644
--- a/libbcachefs.c
+++ b/libbcachefs.c
@@ -545,6 +545,26 @@ static void bch2_sb_print_crypt(struct bch_sb *sb, struct bch_sb_field *f,
BCH_KDF_SCRYPT_P(crypt));
}
+static void bch2_sb_print_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f,
+ enum units units)
+{
+ struct bch_sb_field_replicas_v0 *replicas = field_to_type(f, replicas_v0);
+ struct bch_replicas_entry_v0 *e;
+ unsigned i;
+
+ for_each_replicas_entry(replicas, e) {
+ printf_pad(32, " %s:", bch2_data_types[e->data_type]);
+
+ putchar('[');
+ for (i = 0; i < e->nr_devs; i++) {
+ if (i)
+ putchar(' ');
+ printf("%u", e->devs[i]);
+ }
+ printf("]\n");
+ }
+}
+
static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
enum units units)
{
@@ -553,7 +573,10 @@ static void bch2_sb_print_replicas(struct bch_sb *sb, struct bch_sb_field *f,
unsigned i;
for_each_replicas_entry(replicas, e) {
- printf_pad(32, " %s:", bch2_data_types[e->data_type]);
+ printf_pad(32, " %s: %u/%u",
+ bch2_data_types[e->data_type],
+ e->nr_required,
+ e->nr_devs);
putchar('[');
for (i = 0; i < e->nr_devs; i++) {
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 7ad080bf..56fef9e4 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -883,10 +883,11 @@ struct bch_sb_field {
x(journal, 0) \
x(members, 1) \
x(crypt, 2) \
- x(replicas, 3) \
+ x(replicas_v0, 3) \
x(quota, 4) \
x(disk_groups, 5) \
- x(clean, 6)
+ x(clean, 6) \
+ x(replicas, 7)
enum bch_sb_field_type {
#define x(f, nr) BCH_SB_FIELD_##f = nr,
@@ -1012,16 +1013,28 @@ enum bch_data_type {
BCH_DATA_NR = 6,
};
+struct bch_replicas_entry_v0 {
+ __u8 data_type;
+ __u8 nr_devs;
+ __u8 devs[0];
+} __attribute__((packed));
+
+struct bch_sb_field_replicas_v0 {
+ struct bch_sb_field field;
+ struct bch_replicas_entry_v0 entries[0];
+} __attribute__((packed, aligned(8)));
+
struct bch_replicas_entry {
__u8 data_type;
__u8 nr_devs;
+ __u8 nr_required;
__u8 devs[0];
-};
+} __attribute__((packed));
struct bch_sb_field_replicas {
struct bch_sb_field field;
struct bch_replicas_entry entries[0];
-};
+} __attribute__((packed, aligned(8)));
/* BCH_SB_FIELD_quota: */
@@ -1227,7 +1240,8 @@ enum bch_sb_features {
BCH_FEATURE_LZ4 = 0,
BCH_FEATURE_GZIP = 1,
BCH_FEATURE_ZSTD = 2,
- BCH_FEATURE_ATOMIC_NLINK = 3,
+ BCH_FEATURE_ATOMIC_NLINK = 3, /* should have gone under compat */
+ BCH_FEATURE_NR,
};
/* options: */
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index b3c69da9..6b67da90 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -119,86 +119,105 @@ static bool bkey_type_needs_gc(enum bkey_type type)
}
}
-u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *c, struct bkey_s_c k)
+static void ptr_gen_recalc_oldest(struct bch_fs *c,
+ const struct bch_extent_ptr *ptr,
+ u8 *max_stale)
{
- const struct bch_extent_ptr *ptr;
- u8 max_stale = 0;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ size_t b = PTR_BUCKET_NR(ca, ptr);
- if (bkey_extent_is_data(k.k)) {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ if (gen_after(ca->oldest_gens[b], ptr->gen))
+ ca->oldest_gens[b] = ptr->gen;
- extent_for_each_ptr(e, ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- size_t b = PTR_BUCKET_NR(ca, ptr);
+ *max_stale = max(*max_stale, ptr_stale(ca, ptr));
+}
- if (gen_after(ca->oldest_gens[b], ptr->gen))
- ca->oldest_gens[b] = ptr->gen;
+static u8 ptr_gens_recalc_oldest(struct bch_fs *c,
+ enum bkey_type type,
+ struct bkey_s_c k)
+{
+ const struct bch_extent_ptr *ptr;
+ u8 max_stale = 0;
- max_stale = max(max_stale, ptr_stale(ca, ptr));
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+
+ extent_for_each_ptr(e, ptr)
+ ptr_gen_recalc_oldest(c, ptr, &max_stale);
+ break;
}
+ }
+ break;
+ default:
+ break;
}
return max_stale;
}
-static int bch2_btree_mark_ptrs_initial(struct bch_fs *c, enum bkey_type type,
- struct bkey_s_c k)
+static int ptr_gen_check(struct bch_fs *c,
+ enum bkey_type type,
+ const struct bch_extent_ptr *ptr)
{
- enum bch_data_type data_type = type == BKEY_TYPE_BTREE
- ? BCH_DATA_BTREE : BCH_DATA_USER;
+ struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
+ size_t b = PTR_BUCKET_NR(ca, ptr);
+ struct bucket *g = PTR_BUCKET(ca, ptr);
int ret = 0;
- BUG_ON(journal_seq_verify(c) &&
- k.k->version.lo > journal_cur_seq(&c->journal));
+ if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
+ "found ptr with missing gen in alloc btree,\n"
+ "type %u gen %u",
+ type, ptr->gen)) {
+ g->_mark.gen = ptr->gen;
+ g->_mark.gen_valid = 1;
+ set_bit(b, ca->buckets_dirty);
+ }
- if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
- fsck_err_on(!bch2_bkey_replicas_marked(c, type, k), c,
- "superblock not marked as containing replicas (type %u)",
- data_type)) {
- ret = bch2_mark_bkey_replicas(c, type, k);
- if (ret)
- return ret;
+ if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
+ "%u ptr gen in the future: %u > %u",
+ type, ptr->gen, g->mark.gen)) {
+ g->_mark.gen = ptr->gen;
+ g->_mark.gen_valid = 1;
+ set_bit(b, ca->buckets_dirty);
+ set_bit(BCH_FS_FIXED_GENS, &c->flags);
}
+fsck_err:
+ return ret;
+}
- switch (k.k->type) {
- case BCH_EXTENT:
- case BCH_EXTENT_CACHED: {
- struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
- const struct bch_extent_ptr *ptr;
-
- extent_for_each_ptr(e, ptr) {
- struct bch_dev *ca = bch_dev_bkey_exists(c, ptr->dev);
- size_t b = PTR_BUCKET_NR(ca, ptr);
- struct bucket *g = PTR_BUCKET(ca, ptr);
-
- if (mustfix_fsck_err_on(!g->mark.gen_valid, c,
- "found ptr with missing gen in alloc btree,\n"
- "type %s gen %u",
- bch2_data_types[data_type],
- ptr->gen)) {
- g->_mark.gen = ptr->gen;
- g->_mark.gen_valid = 1;
- set_bit(b, ca->buckets_dirty);
- }
+static int ptr_gens_check(struct bch_fs *c, enum bkey_type type,
+ struct bkey_s_c k)
+{
+ const struct bch_extent_ptr *ptr;
+ int ret = 0;
- if (mustfix_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
- "%s ptr gen in the future: %u > %u",
- bch2_data_types[data_type],
- ptr->gen, g->mark.gen)) {
- g->_mark.gen = ptr->gen;
- g->_mark.gen_valid = 1;
- set_bit(b, ca->buckets_dirty);
- set_bit(BCH_FS_FIXED_GENS, &c->flags);
- }
+ switch (type) {
+ case BKEY_TYPE_BTREE:
+ case BKEY_TYPE_EXTENTS:
+ switch (k.k->type) {
+ case BCH_EXTENT:
+ case BCH_EXTENT_CACHED: {
+ struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
+ extent_for_each_ptr(e, ptr) {
+ ret = ptr_gen_check(c, type, ptr);
+ if (ret)
+ return ret;
+
+ }
+ break;
+ }
}
break;
- }
+ default:
+ break;
}
- if (k.k->version.lo > atomic64_read(&c->key_version))
- atomic64_set(&c->key_version, k.k->version.lo);
-fsck_err:
return ret;
}
@@ -215,31 +234,32 @@ static int bch2_gc_mark_key(struct bch_fs *c, enum bkey_type type,
(initial ? BCH_BUCKET_MARK_NOATOMIC : 0);
int ret = 0;
- switch (type) {
- case BKEY_TYPE_BTREE:
- case BKEY_TYPE_EXTENTS:
- if (initial) {
- ret = bch2_btree_mark_ptrs_initial(c, type, k);
- if (ret < 0)
+ if (initial) {
+ BUG_ON(journal_seq_verify(c) &&
+ k.k->version.lo > journal_cur_seq(&c->journal));
+
+ if (k.k->version.lo > atomic64_read(&c->key_version))
+ atomic64_set(&c->key_version, k.k->version.lo);
+
+ if (test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
+ fsck_err_on(!bch2_bkey_replicas_marked(c, type, k,
+ false), c,
+ "superblock not marked as containing replicas (type %u)",
+ type)) {
+ ret = bch2_mark_bkey_replicas(c, type, k);
+ if (ret)
return ret;
}
- break;
- default:
- break;
- }
- bch2_mark_key(c, type, k, true, k.k->size,
- pos, NULL, 0, flags);
-
- switch (type) {
- case BKEY_TYPE_BTREE:
- case BKEY_TYPE_EXTENTS:
- ret = bch2_btree_key_recalc_oldest_gen(c, k);
- break;
- default:
- break;
+ ret = ptr_gens_check(c, type, k);
+ if (ret)
+ return ret;
}
+ bch2_mark_key(c, type, k, true, k.k->size, pos, NULL, 0, flags);
+
+ ret = ptr_gens_recalc_oldest(c, type, k);
+fsck_err:
return ret;
}
diff --git a/libbcachefs/btree_gc.h b/libbcachefs/btree_gc.h
index f9225af2..101a6a89 100644
--- a/libbcachefs/btree_gc.h
+++ b/libbcachefs/btree_gc.h
@@ -10,7 +10,6 @@ void bch2_gc(struct bch_fs *);
void bch2_gc_thread_stop(struct bch_fs *);
int bch2_gc_thread_start(struct bch_fs *);
int bch2_initial_gc(struct bch_fs *, struct list_head *);
-u8 bch2_btree_key_recalc_oldest_gen(struct bch_fs *, struct bkey_s_c);
void bch2_mark_dev_superblock(struct bch_fs *, struct bch_dev *, unsigned);
/*
diff --git a/libbcachefs/btree_types.h b/libbcachefs/btree_types.h
index 44349159..a7eda114 100644
--- a/libbcachefs/btree_types.h
+++ b/libbcachefs/btree_types.h
@@ -439,11 +439,11 @@ enum btree_insert_ret {
BTREE_INSERT_OK,
/* extent spanned multiple leaf nodes: have to traverse to next node: */
BTREE_INSERT_NEED_TRAVERSE,
- /* write lock held for too long */
/* leaf node needs to be split */
BTREE_INSERT_BTREE_NODE_FULL,
BTREE_INSERT_ENOSPC,
BTREE_INSERT_NEED_GC_LOCK,
+ BTREE_INSERT_NEED_MARK_REPLICAS,
};
enum btree_gc_coalesce_fail_reason {
diff --git a/libbcachefs/btree_update_leaf.c b/libbcachefs/btree_update_leaf.c
index 33c913f7..288d7ca6 100644
--- a/libbcachefs/btree_update_leaf.c
+++ b/libbcachefs/btree_update_leaf.c
@@ -5,11 +5,13 @@
#include "btree_io.h"
#include "btree_iter.h"
#include "btree_locking.h"
+#include "buckets.h"
#include "debug.h"
#include "extents.h"
#include "journal.h"
#include "journal_reclaim.h"
#include "keylist.h"
+#include "replicas.h"
#include <linux/sort.h>
#include <trace/events/bcachefs.h>
@@ -203,6 +205,8 @@ btree_insert_key_leaf(struct btree_insert *trans,
int old_live_u64s = b->nr.live_u64s;
int live_u64s_added, u64s_added;
+ bch2_mark_update(trans, insert);
+
ret = !btree_node_is_extents(b)
? bch2_insert_fixup_key(trans, insert)
: bch2_insert_fixup_extent(trans, insert);
@@ -297,8 +301,8 @@ static inline int btree_trans_cmp(struct btree_insert_entry l,
static enum btree_insert_ret
btree_key_can_insert(struct btree_insert *trans,
- struct btree_insert_entry *insert,
- unsigned *u64s)
+ struct btree_insert_entry *insert,
+ unsigned *u64s)
{
struct bch_fs *c = trans->c;
struct btree *b = insert->iter->l[0].b;
@@ -307,6 +311,12 @@ btree_key_can_insert(struct btree_insert *trans,
if (unlikely(btree_node_fake(b)))
return BTREE_INSERT_BTREE_NODE_FULL;
+ if (!bch2_bkey_replicas_marked(c,
+ insert->iter->btree_id,
+ bkey_i_to_s_c(insert->k),
+ true))
+ return BTREE_INSERT_NEED_MARK_REPLICAS;
+
ret = !btree_node_is_extents(b)
? BTREE_INSERT_OK
: bch2_extent_can_insert(trans, insert, u64s);
@@ -323,8 +333,7 @@ btree_key_can_insert(struct btree_insert *trans,
* Get journal reservation, take write locks, and attempt to do btree update(s):
*/
static inline int do_btree_insert_at(struct btree_insert *trans,
- struct btree_iter **split,
- bool *cycle_gc_lock)
+ struct btree_insert_entry **stopped_at)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
@@ -368,22 +377,10 @@ static inline int do_btree_insert_at(struct btree_insert *trans,
u64s = 0;
u64s += i->k->k.u64s;
- switch (btree_key_can_insert(trans, i, &u64s)) {
- case BTREE_INSERT_OK:
- break;
- case BTREE_INSERT_BTREE_NODE_FULL:
- ret = -EINTR;
- *split = i->iter;
+ ret = btree_key_can_insert(trans, i, &u64s);
+ if (ret) {
+ *stopped_at = i;
goto out;
- case BTREE_INSERT_ENOSPC:
- ret = -ENOSPC;
- goto out;
- case BTREE_INSERT_NEED_GC_LOCK:
- ret = -EINTR;
- *cycle_gc_lock = true;
- goto out;
- default:
- BUG();
}
}
@@ -441,8 +438,7 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
{
struct bch_fs *c = trans->c;
struct btree_insert_entry *i;
- struct btree_iter *linked, *split = NULL;
- bool cycle_gc_lock = false;
+ struct btree_iter *linked;
unsigned flags;
int ret;
@@ -462,9 +458,6 @@ int __bch2_btree_insert_at(struct btree_insert *trans)
if (unlikely(!percpu_ref_tryget(&c->writes)))
return -EROFS;
retry:
- split = NULL;
- cycle_gc_lock = false;
-
trans_for_each_entry(trans, i) {
unsigned old_locks_want = i->iter->locks_want;
unsigned old_uptodate = i->iter->uptodate;
@@ -482,7 +475,7 @@ retry:
}
}
- ret = do_btree_insert_at(trans, &split, &cycle_gc_lock);
+ ret = do_btree_insert_at(trans, &i);
if (unlikely(ret))
goto err;
@@ -517,8 +510,9 @@ err:
if (!trans->did_work)
flags &= ~BTREE_INSERT_NOUNLOCK;
- if (split) {
- ret = bch2_btree_split_leaf(c, split, flags);
+ switch (ret) {
+ case BTREE_INSERT_BTREE_NODE_FULL:
+ ret = bch2_btree_split_leaf(c, i->iter, flags);
/*
* if the split succeeded without dropping locks the insert will
@@ -543,9 +537,10 @@ err:
trans_restart(" (split)");
ret = -EINTR;
}
- }
+ break;
+ case BTREE_INSERT_NEED_GC_LOCK:
+ ret = -EINTR;
- if (cycle_gc_lock) {
if (!down_read_trylock(&c->gc_lock)) {
if (flags & BTREE_INSERT_NOUNLOCK)
goto out;
@@ -554,6 +549,24 @@ err:
down_read(&c->gc_lock);
}
up_read(&c->gc_lock);
+ break;
+ case BTREE_INSERT_ENOSPC:
+ ret = -ENOSPC;
+ break;
+ case BTREE_INSERT_NEED_MARK_REPLICAS:
+ if (flags & BTREE_INSERT_NOUNLOCK) {
+ ret = -EINTR;
+ goto out;
+ }
+
+ bch2_btree_iter_unlock(trans->entries[0].iter);
+ ret = bch2_mark_bkey_replicas(c, i->iter->btree_id,
+ bkey_i_to_s_c(i->k))
+ ?: -EINTR;
+ break;
+ default:
+ BUG_ON(ret >= 0);
+ break;
}
if (ret == -EINTR) {
diff --git a/libbcachefs/buckets.c b/libbcachefs/buckets.c
index c68683eb..86d57f3b 100644
--- a/libbcachefs/buckets.c
+++ b/libbcachefs/buckets.c
@@ -64,7 +64,9 @@
#include "bcachefs.h"
#include "alloc_background.h"
+#include "bset.h"
#include "btree_gc.h"
+#include "btree_update.h"
#include "buckets.h"
#include "error.h"
#include "movinggc.h"
@@ -345,7 +347,8 @@ void bch2_fs_usage_apply(struct bch_fs *c,
* reservation:
*/
should_not_have_added = added - (s64) (disk_res ? disk_res->sectors : 0);
- if (WARN_ON(should_not_have_added > 0)) {
+ if (WARN_ONCE(should_not_have_added > 0,
+ "disk usage increased without a reservation")) {
atomic64_sub(should_not_have_added, &c->sectors_available);
added -= should_not_have_added;
}
@@ -636,9 +639,6 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
struct bch_fs_usage *stats,
u64 journal_seq, unsigned flags)
{
- unsigned replicas = bch2_extent_nr_dirty_ptrs(k);
-
- BUG_ON(replicas && replicas - 1 > ARRAY_SIZE(stats->replicas));
BUG_ON(!sectors);
switch (k.k->type) {
@@ -647,38 +647,43 @@ static void bch2_mark_extent(struct bch_fs *c, struct bkey_s_c k,
struct bkey_s_c_extent e = bkey_s_c_to_extent(k);
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
+ s64 cached_sectors = 0;
+ s64 dirty_sectors = 0;
+ unsigned replicas = 0;
extent_for_each_ptr_decode(e, p, entry) {
s64 disk_sectors = ptr_disk_sectors(e, p, sectors);
- /*
- * fs level usage (which determines free space) is in
- * uncompressed sectors, until copygc + compression is
- * sorted out:
- *
- * note also that we always update @fs_usage, even when
- * we otherwise wouldn't do anything because gc is
- * running - this is because the caller still needs to
- * account w.r.t. its disk reservation. It is caller's
- * responsibility to not apply @fs_usage if gc is in
- * progress.
- */
- stats->replicas
- [!p.ptr.cached && replicas ? replicas - 1 : 0].data
- [!p.ptr.cached ? data_type : BCH_DATA_CACHED] +=
- disk_sectors;
-
bch2_mark_pointer(c, e, p, disk_sectors, data_type,
stats, journal_seq, flags);
+
+ if (!p.ptr.cached)
+ replicas++;
+
+ if (p.ptr.cached)
+ cached_sectors += disk_sectors;
+ else
+ dirty_sectors += disk_sectors;
}
+
+ replicas = clamp_t(unsigned, replicas,
+ 1, ARRAY_SIZE(stats->replicas));
+
+ stats->replicas[0].data[BCH_DATA_CACHED] += cached_sectors;
+ stats->replicas[replicas - 1].data[data_type] += dirty_sectors;
break;
}
- case BCH_RESERVATION:
- if (replicas)
- stats->replicas[replicas - 1].persistent_reserved +=
- sectors * replicas;
+ case BCH_RESERVATION: {
+ unsigned replicas = bkey_s_c_to_reservation(k).v->nr_replicas;
+
+ sectors *= replicas;
+ replicas = clamp_t(unsigned, replicas,
+ 1, ARRAY_SIZE(stats->replicas));
+
+ stats->replicas[replicas - 1].persistent_reserved += sectors;
break;
}
+ }
}
void bch2_mark_key(struct bch_fs *c,
@@ -742,6 +747,76 @@ void bch2_mark_key(struct bch_fs *c,
percpu_up_read_preempt_enable(&c->usage_lock);
}
+void bch2_mark_update(struct btree_insert *trans,
+ struct btree_insert_entry *insert)
+{
+ struct bch_fs *c = trans->c;
+ struct btree_iter *iter = insert->iter;
+ struct btree *b = iter->l[0].b;
+ struct btree_node_iter node_iter = iter->l[0].iter;
+ struct bch_fs_usage stats = { 0 };
+ struct gc_pos pos = gc_pos_btree_node(b);
+ struct bkey_packed *_k;
+
+ if (!(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
+ bch2_mark_key(c, btree_node_type(b), bkey_i_to_s_c(insert->k),
+ true,
+ bpos_min(insert->k->k.p, b->key.k.p).offset -
+ bkey_start_offset(&insert->k->k),
+ pos, &stats, trans->journal_res.seq, 0);
+
+ while ((_k = bch2_btree_node_iter_peek_filter(&node_iter, b,
+ KEY_TYPE_DISCARD))) {
+ struct bkey unpacked;
+ struct bkey_s_c k;
+ s64 sectors = 0;
+
+ k = bkey_disassemble(b, _k, &unpacked);
+
+ if (btree_node_is_extents(b)
+ ? bkey_cmp(insert->k->k.p, bkey_start_pos(k.k)) <= 0
+ : bkey_cmp(insert->k->k.p, k.k->p))
+ break;
+
+ if (btree_node_is_extents(b)) {
+ switch (bch2_extent_overlap(&insert->k->k, k.k)) {
+ case BCH_EXTENT_OVERLAP_ALL:
+ sectors = -((s64) k.k->size);
+ break;
+ case BCH_EXTENT_OVERLAP_BACK:
+ sectors = bkey_start_offset(&insert->k->k) -
+ k.k->p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_FRONT:
+ sectors = bkey_start_offset(k.k) -
+ insert->k->k.p.offset;
+ break;
+ case BCH_EXTENT_OVERLAP_MIDDLE:
+ sectors = k.k->p.offset - insert->k->k.p.offset;
+ BUG_ON(sectors <= 0);
+
+ bch2_mark_key(c, btree_node_type(b), k,
+ true, sectors,
+ pos, &stats, trans->journal_res.seq, 0);
+
+ sectors = bkey_start_offset(&insert->k->k) -
+ k.k->p.offset;
+ break;
+ }
+
+ BUG_ON(sectors >= 0);
+ }
+
+ bch2_mark_key(c, btree_node_type(b), k,
+ false, sectors,
+ pos, &stats, trans->journal_res.seq, 0);
+
+ bch2_btree_node_iter_advance(&node_iter, b);
+ }
+
+ bch2_fs_usage_apply(c, &stats, trans->disk_res, pos);
+}
+
/* Disk reservations: */
static u64 __recalc_sectors_available(struct bch_fs *c)
diff --git a/libbcachefs/buckets.h b/libbcachefs/buckets.h
index 17b82cd0..e84247d5 100644
--- a/libbcachefs/buckets.h
+++ b/libbcachefs/buckets.h
@@ -212,6 +212,7 @@ void bch2_mark_metadata_bucket(struct bch_fs *, struct bch_dev *,
void bch2_mark_key(struct bch_fs *, enum bkey_type, struct bkey_s_c,
bool, s64, struct gc_pos,
struct bch_fs_usage *, u64, unsigned);
+void bch2_mark_update(struct btree_insert *, struct btree_insert_entry *);
void bch2_recalc_sectors_available(struct bch_fs *);
diff --git a/libbcachefs/extents.c b/libbcachefs/extents.c
index 72301eab..a3ec1cc9 100644
--- a/libbcachefs/extents.c
+++ b/libbcachefs/extents.c
@@ -675,7 +675,8 @@ void bch2_btree_ptr_debugcheck(struct bch_fs *c, struct btree *b,
}
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
+ !bch2_bkey_replicas_marked(c, btree_node_type(b),
+ e.s_c, false)) {
bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b), k);
bch2_fs_bug(c,
"btree key bad (replicas not marked in superblock):\n%s",
@@ -1009,7 +1010,6 @@ struct extent_insert_state {
struct btree_insert *trans;
struct btree_insert_entry *insert;
struct bpos committed;
- struct bch_fs_usage stats;
/* for deleting: */
struct bkey_i whiteout;
@@ -1018,54 +1018,6 @@ struct extent_insert_state {
bool deleting;
};
-static void bch2_add_sectors(struct extent_insert_state *s,
- struct bkey_s_c k, u64 offset, s64 sectors)
-{
- struct bch_fs *c = s->trans->c;
- struct btree *b = s->insert->iter->l[0].b;
-
- EBUG_ON(bkey_cmp(bkey_start_pos(k.k), b->data->min_key) < 0);
-
- if (!sectors)
- return;
-
- bch2_mark_key(c, BKEY_TYPE_EXTENTS, k, sectors > 0, sectors,
- gc_pos_btree_node(b), &s->stats,
- s->trans->journal_res.seq, 0);
-}
-
-static void bch2_subtract_sectors(struct extent_insert_state *s,
- struct bkey_s_c k, u64 offset, s64 sectors)
-{
- bch2_add_sectors(s, k, offset, -sectors);
-}
-
-/* These wrappers subtract exactly the sectors that we're removing from @k */
-static void bch2_cut_subtract_back(struct extent_insert_state *s,
- struct bpos where, struct bkey_s k)
-{
- bch2_subtract_sectors(s, k.s_c, where.offset,
- k.k->p.offset - where.offset);
- bch2_cut_back(where, k.k);
-}
-
-static void bch2_cut_subtract_front(struct extent_insert_state *s,
- struct bpos where, struct bkey_s k)
-{
- bch2_subtract_sectors(s, k.s_c, bkey_start_offset(k.k),
- where.offset - bkey_start_offset(k.k));
- __bch2_cut_front(where, k);
-}
-
-static void bch2_drop_subtract(struct extent_insert_state *s, struct bkey_s k)
-{
- if (k.k->size)
- bch2_subtract_sectors(s, k.s_c,
- bkey_start_offset(k.k), k.k->size);
- k.k->size = 0;
- k.k->type = KEY_TYPE_DELETED;
-}
-
static bool bch2_extent_merge_inline(struct bch_fs *,
struct btree_iter *,
struct bkey_packed *,
@@ -1166,11 +1118,7 @@ static void extent_insert_committed(struct extent_insert_state *s)
if (s->deleting)
split.k.k.type = KEY_TYPE_DISCARD;
- if (!(s->trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
- bch2_cut_subtract_back(s, s->committed,
- bkey_i_to_s(&split.k));
- else
- bch2_cut_back(s->committed, &split.k.k);
+ bch2_cut_back(s->committed, &split.k.k);
if (!bkey_cmp(s->committed, iter->pos))
return;
@@ -1290,7 +1238,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
switch (overlap) {
case BCH_EXTENT_OVERLAP_FRONT:
/* insert overlaps with start of k: */
- bch2_cut_subtract_front(s, insert->k.p, k);
+ __bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
verify_modified_extent(iter, _k);
@@ -1298,7 +1246,7 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
case BCH_EXTENT_OVERLAP_BACK:
/* insert overlaps with end of k: */
- bch2_cut_subtract_back(s, bkey_start_pos(&insert->k), k);
+ bch2_cut_back(bkey_start_pos(&insert->k), k.k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
@@ -1318,7 +1266,8 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
if (!bkey_whiteout(k.k))
btree_account_key_drop(l->b, _k);
- bch2_drop_subtract(s, k);
+ k.k->size = 0;
+ k.k->type = KEY_TYPE_DELETED;
if (_k >= btree_bset_last(l->b)->start) {
unsigned u64s = _k->u64s;
@@ -1358,14 +1307,11 @@ extent_squash(struct extent_insert_state *s, struct bkey_i *insert,
bch2_cut_back(bkey_start_pos(&insert->k), &split.k.k);
BUG_ON(bkey_deleted(&split.k.k));
- bch2_cut_subtract_front(s, insert->k.p, k);
+ __bch2_cut_front(insert->k.p, k);
BUG_ON(bkey_deleted(k.k));
extent_save(l->b, _k, k.k);
verify_modified_extent(iter, _k);
- bch2_add_sectors(s, bkey_i_to_s_c(&split.k),
- bkey_start_offset(&split.k.k),
- split.k.k.size);
extent_bset_insert(c, iter, &split.k);
break;
}
@@ -1414,8 +1360,6 @@ static void __bch2_insert_fixup_extent(struct extent_insert_state *s)
!bkey_cmp(bkey_start_pos(&insert->k), bkey_start_pos(k.k))) {
if (!bkey_whiteout(k.k)) {
btree_account_key_drop(l->b, _k);
- bch2_subtract_sectors(s, k.s_c,
- bkey_start_offset(k.k), k.k->size);
_k->type = KEY_TYPE_DISCARD;
reserve_whiteout(l->b, _k);
}
@@ -1505,7 +1449,6 @@ enum btree_insert_ret
bch2_insert_fixup_extent(struct btree_insert *trans,
struct btree_insert_entry *insert)
{
- struct bch_fs *c = trans->c;
struct btree_iter *iter = insert->iter;
struct btree *b = iter->l[0].b;
struct extent_insert_state s = {
@@ -1530,19 +1473,10 @@ bch2_insert_fixup_extent(struct btree_insert *trans,
*/
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
- if (!s.deleting &&
- !(trans->flags & BTREE_INSERT_JOURNAL_REPLAY))
- bch2_add_sectors(&s, bkey_i_to_s_c(insert->k),
- bkey_start_offset(&insert->k->k),
- insert->k->k.size);
-
__bch2_insert_fixup_extent(&s);
extent_insert_committed(&s);
- bch2_fs_usage_apply(c, &s.stats, trans->disk_res,
- gc_pos_btree_node(b));
-
EBUG_ON(bkey_cmp(iter->pos, bkey_start_pos(&insert->k->k)));
EBUG_ON(bkey_cmp(iter->pos, s.committed));
@@ -1702,7 +1636,8 @@ static void bch2_extent_debugcheck_extent(struct bch_fs *c, struct btree *b,
}
if (!test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) &&
- !bch2_bkey_replicas_marked(c, btree_node_type(b), e.s_c)) {
+ !bch2_bkey_replicas_marked(c, btree_node_type(b),
+ e.s_c, false)) {
bch2_bkey_val_to_text(&PBUF(buf), c, btree_node_type(b),
e.s_c);
bch2_fs_bug(c,
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index eceb4865..34cab253 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -22,7 +22,6 @@
#include "keylist.h"
#include "move.h"
#include "rebalance.h"
-#include "replicas.h"
#include "super.h"
#include "super-io.h"
@@ -319,13 +318,6 @@ static void __bch2_write_index(struct bch_write_op *op)
goto err;
}
- if (!(op->flags & BCH_WRITE_NOMARK_REPLICAS)) {
- ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
- e.s_c);
- if (ret)
- goto err;
- }
-
dst = bkey_next(dst);
}
diff --git a/libbcachefs/io.h b/libbcachefs/io.h
index 5bd5f846..3ca77974 100644
--- a/libbcachefs/io.h
+++ b/libbcachefs/io.h
@@ -30,10 +30,9 @@ enum bch_write_flags {
BCH_WRITE_PAGES_OWNED = (1 << 5),
BCH_WRITE_ONLY_SPECIFIED_DEVS = (1 << 6),
BCH_WRITE_NOPUT_RESERVATION = (1 << 7),
- BCH_WRITE_NOMARK_REPLICAS = (1 << 8),
/* Internal: */
- BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 9),
+ BCH_WRITE_JOURNAL_SEQ_PTR = (1 << 8),
};
static inline u64 *op_journal_seq(struct bch_write_op *op)
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c
index 961d8d81..c83e8eb8 100644
--- a/libbcachefs/journal_io.c
+++ b/libbcachefs/journal_io.c
@@ -780,7 +780,7 @@ int bch2_journal_read(struct bch_fs *c, struct list_head *list)
if (!degraded &&
(test_bit(BCH_FS_REBUILD_REPLICAS, &c->flags) ||
fsck_err_on(!bch2_replicas_marked(c, BCH_DATA_JOURNAL,
- i->devs), c,
+ i->devs, false), c,
"superblock not marked as containing replicas (type %u)",
BCH_DATA_JOURNAL))) {
ret = bch2_mark_replicas(c, BCH_DATA_JOURNAL, i->devs);
diff --git a/libbcachefs/migrate.c b/libbcachefs/migrate.c
index c0dfe1c6..df4fbae2 100644
--- a/libbcachefs/migrate.c
+++ b/libbcachefs/migrate.c
@@ -71,11 +71,6 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c, unsigned dev_idx, int flags)
*/
bch2_extent_normalize(c, e.s);
- ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
- bkey_i_to_s_c(&tmp.key));
- if (ret)
- break;
-
iter.pos = bkey_start_pos(&tmp.key.k);
ret = bch2_btree_insert_at(c, NULL, NULL,
diff --git a/libbcachefs/move.c b/libbcachefs/move.c
index e93725bf..885792bd 100644
--- a/libbcachefs/move.c
+++ b/libbcachefs/move.c
@@ -150,11 +150,6 @@ static int bch2_migrate_index_update(struct bch_write_op *op)
goto next;
}
- ret = bch2_mark_bkey_replicas(c, BKEY_TYPE_EXTENTS,
- extent_i_to_s_c(insert).s_c);
- if (ret)
- break;
-
ret = bch2_btree_insert_at(c, &op->res,
op_journal_seq(op),
BTREE_INSERT_ATOMIC|
@@ -239,8 +234,7 @@ int bch2_migrate_write_init(struct bch_fs *c, struct migrate_write *m,
m->op.flags |= BCH_WRITE_ONLY_SPECIFIED_DEVS|
BCH_WRITE_PAGES_STABLE|
BCH_WRITE_PAGES_OWNED|
- BCH_WRITE_DATA_ENCODED|
- BCH_WRITE_NOMARK_REPLICAS;
+ BCH_WRITE_DATA_ENCODED;
m->op.nr_replicas = 1;
m->op.nr_replicas_required = 1;
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 1dab991b..775d6a66 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -2,6 +2,7 @@
#include <linux/kernel.h>
#include "bcachefs.h"
+#include "compress.h"
#include "disk_groups.h"
#include "opts.h"
#include "super-io.h"
@@ -268,6 +269,20 @@ void bch2_opt_to_text(struct printbuf *out, struct bch_fs *c,
}
}
+int bch2_opt_check_may_set(struct bch_fs *c, int id, u64 v)
+{
+ int ret = 0;
+
+ switch (id) {
+ case Opt_compression:
+ case Opt_background_compression:
+ ret = bch2_check_set_has_compressed_data(c, v);
+ break;
+ }
+
+ return ret;
+}
+
int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
{
char *opt, *name, *val;
diff --git a/libbcachefs/opts.h b/libbcachefs/opts.h
index c65a8d13..bdf1e4fb 100644
--- a/libbcachefs/opts.h
+++ b/libbcachefs/opts.h
@@ -265,6 +265,7 @@ int bch2_opt_parse(struct bch_fs *, const struct bch_option *, const char *, u64
void bch2_opt_to_text(struct printbuf *, struct bch_fs *,
const struct bch_option *, u64, unsigned);
+int bch2_opt_check_may_set(struct bch_fs *, int, u64);
int bch2_parse_mount_opts(struct bch_opts *, char *);
/* inode opts: */
diff --git a/libbcachefs/recovery.c b/libbcachefs/recovery.c
index f530f202..c5d9dc4e 100644
--- a/libbcachefs/recovery.c
+++ b/libbcachefs/recovery.c
@@ -129,7 +129,8 @@ int bch2_fs_recovery(struct bch_fs *c)
int ret;
mutex_lock(&c->sb_lock);
- if (!bch2_sb_get_replicas(c->disk_sb.sb)) {
+ if (!rcu_dereference_protected(c->replicas,
+ lockdep_is_held(&c->sb_lock))->nr) {
bch_info(c, "building replicas info");
set_bit(BCH_FS_REBUILD_REPLICAS, &c->flags);
}
diff --git a/libbcachefs/replicas.c b/libbcachefs/replicas.c
index 4da35d09..a7a4e280 100644
--- a/libbcachefs/replicas.c
+++ b/libbcachefs/replicas.c
@@ -44,7 +44,10 @@ static void replicas_entry_to_text(struct printbuf *out,
{
unsigned i;
- pr_buf(out, "%u: [", e->data_type);
+ pr_buf(out, "%s: %u/%u [",
+ bch2_data_types[e->data_type],
+ e->nr_required,
+ e->nr_devs);
for (i = 0; i < e->nr_devs; i++)
pr_buf(out, i ? " %u" : "%u", e->devs[i]);
@@ -74,6 +77,8 @@ static void extent_to_replicas(struct bkey_s_c k,
const union bch_extent_entry *entry;
struct extent_ptr_decoded p;
+ r->nr_required = 1;
+
extent_for_each_ptr_decode(e, p, entry)
if (!p.ptr.cached)
r->devs[r->nr_devs++] = p.ptr.dev;
@@ -114,6 +119,7 @@ static inline void devlist_to_replicas(struct bch_devs_list devs,
e->data_type = data_type;
e->nr_devs = 0;
+ e->nr_required = 1;
for (i = 0; i < devs.nr; i++)
e->devs[e->nr_devs++] = devs.devs[i];
@@ -153,8 +159,8 @@ cpu_replicas_add_entry(struct bch_replicas_cpu *old,
return new;
}
-static bool replicas_has_entry(struct bch_replicas_cpu *r,
- struct bch_replicas_entry *search)
+static bool __replicas_has_entry(struct bch_replicas_cpu *r,
+ struct bch_replicas_entry *search)
{
return replicas_entry_bytes(search) <= r->entry_size &&
eytzinger0_find(r->entries, r->nr,
@@ -162,6 +168,24 @@ static bool replicas_has_entry(struct bch_replicas_cpu *r,
memcmp, search) < r->nr;
}
+static bool replicas_has_entry(struct bch_fs *c,
+ struct bch_replicas_entry *search,
+ bool check_gc_replicas)
+{
+ struct bch_replicas_cpu *r, *gc_r;
+ bool marked;
+
+ rcu_read_lock();
+ r = rcu_dereference(c->replicas);
+ marked = __replicas_has_entry(r, search) &&
+ (!check_gc_replicas ||
+ likely(!(gc_r = rcu_dereference(c->replicas_gc))) ||
+ __replicas_has_entry(gc_r, search));
+ rcu_read_unlock();
+
+ return marked;
+}
+
noinline
static int bch2_mark_replicas_slowpath(struct bch_fs *c,
struct bch_replicas_entry *new_entry)
@@ -173,7 +197,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
old_gc = rcu_dereference_protected(c->replicas_gc,
lockdep_is_held(&c->sb_lock));
- if (old_gc && !replicas_has_entry(old_gc, new_entry)) {
+ if (old_gc && !__replicas_has_entry(old_gc, new_entry)) {
new_gc = cpu_replicas_add_entry(old_gc, new_entry);
if (!new_gc)
goto err;
@@ -181,7 +205,7 @@ static int bch2_mark_replicas_slowpath(struct bch_fs *c,
old_r = rcu_dereference_protected(c->replicas,
lockdep_is_held(&c->sb_lock));
- if (!replicas_has_entry(old_r, new_entry)) {
+ if (!__replicas_has_entry(old_r, new_entry)) {
new_r = cpu_replicas_add_entry(old_r, new_entry);
if (!new_r)
goto err;
@@ -220,17 +244,8 @@ err:
static int __bch2_mark_replicas(struct bch_fs *c,
struct bch_replicas_entry *devs)
{
- struct bch_replicas_cpu *r, *gc_r;
- bool marked;
-
- rcu_read_lock();
- r = rcu_dereference(c->replicas);
- gc_r = rcu_dereference(c->replicas_gc);
- marked = replicas_has_entry(r, devs) &&
- (!likely(gc_r) || replicas_has_entry(gc_r, devs));
- rcu_read_unlock();
-
- return likely(marked) ? 0
+ return likely(replicas_has_entry(c, devs, true))
+ ? 0
: bch2_mark_replicas_slowpath(c, devs);
}
@@ -358,14 +373,13 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
{
struct bch_replicas_entry *e, *dst;
struct bch_replicas_cpu *cpu_r;
- unsigned nr = 0, entry_size = 0;
+ unsigned nr = 0, entry_size = 0, idx = 0;
- if (sb_r)
- for_each_replicas_entry(sb_r, e) {
- entry_size = max_t(unsigned, entry_size,
- replicas_entry_bytes(e));
- nr++;
- }
+ for_each_replicas_entry(sb_r, e) {
+ entry_size = max_t(unsigned, entry_size,
+ replicas_entry_bytes(e));
+ nr++;
+ }
cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
nr * entry_size, GFP_NOIO);
@@ -375,29 +389,71 @@ __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r)
cpu_r->nr = nr;
cpu_r->entry_size = entry_size;
- nr = 0;
+ for_each_replicas_entry(sb_r, e) {
+ dst = cpu_replicas_entry(cpu_r, idx++);
+ memcpy(dst, e, replicas_entry_bytes(e));
+ replicas_entry_sort(dst);
+ }
- if (sb_r)
- for_each_replicas_entry(sb_r, e) {
- dst = cpu_replicas_entry(cpu_r, nr++);
- memcpy(dst, e, replicas_entry_bytes(e));
- replicas_entry_sort(dst);
- }
+ return cpu_r;
+}
+
+static struct bch_replicas_cpu *
+__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r)
+{
+ struct bch_replicas_entry_v0 *e;
+ struct bch_replicas_cpu *cpu_r;
+ unsigned nr = 0, entry_size = 0, idx = 0;
+
+ for_each_replicas_entry(sb_r, e) {
+ entry_size = max_t(unsigned, entry_size,
+ replicas_entry_bytes(e));
+ nr++;
+ }
+
+ entry_size += sizeof(struct bch_replicas_entry) -
+ sizeof(struct bch_replicas_entry_v0);
+
+ cpu_r = kzalloc(sizeof(struct bch_replicas_cpu) +
+ nr * entry_size, GFP_NOIO);
+ if (!cpu_r)
+ return NULL;
+
+ cpu_r->nr = nr;
+ cpu_r->entry_size = entry_size;
+
+ for_each_replicas_entry(sb_r, e) {
+ struct bch_replicas_entry *dst =
+ cpu_replicas_entry(cpu_r, idx++);
+
+ dst->data_type = e->data_type;
+ dst->nr_devs = e->nr_devs;
+ dst->nr_required = 1;
+ memcpy(dst->devs, e->devs, e->nr_devs);
+ replicas_entry_sort(dst);
+ }
- bch2_cpu_replicas_sort(cpu_r);
return cpu_r;
}
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
{
- struct bch_sb_field_replicas *sb_r;
+ struct bch_sb_field_replicas *sb_v1;
+ struct bch_sb_field_replicas_v0 *sb_v0;
struct bch_replicas_cpu *cpu_r, *old_r;
- sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
- cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_r);
+ if ((sb_v1 = bch2_sb_get_replicas(c->disk_sb.sb)))
+ cpu_r = __bch2_sb_replicas_to_cpu_replicas(sb_v1);
+ else if ((sb_v0 = bch2_sb_get_replicas_v0(c->disk_sb.sb)))
+ cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0);
+ else
+ cpu_r = kzalloc(sizeof(struct bch_replicas_cpu), GFP_NOIO);
+
if (!cpu_r)
return -ENOMEM;
+ bch2_cpu_replicas_sort(cpu_r);
+
old_r = rcu_dereference_check(c->replicas, lockdep_is_held(&c->sb_lock));
rcu_assign_pointer(c->replicas, cpu_r);
if (old_r)
@@ -406,23 +462,72 @@ int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
return 0;
}
+static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
+ struct bch_replicas_cpu *r)
+{
+ struct bch_sb_field_replicas_v0 *sb_r;
+ struct bch_replicas_entry_v0 *dst;
+ struct bch_replicas_entry *src;
+ size_t bytes;
+
+ bytes = sizeof(struct bch_sb_field_replicas);
+
+ for_each_cpu_replicas_entry(r, src)
+ bytes += replicas_entry_bytes(src) - 1;
+
+ sb_r = bch2_sb_resize_replicas_v0(&c->disk_sb,
+ DIV_ROUND_UP(bytes, sizeof(u64)));
+ if (!sb_r)
+ return -ENOSPC;
+
+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
+ sb_r = bch2_sb_get_replicas_v0(c->disk_sb.sb);
+
+ memset(&sb_r->entries, 0,
+ vstruct_end(&sb_r->field) -
+ (void *) &sb_r->entries);
+
+ dst = sb_r->entries;
+ for_each_cpu_replicas_entry(r, src) {
+ dst->data_type = src->data_type;
+ dst->nr_devs = src->nr_devs;
+ memcpy(dst->devs, src->devs, src->nr_devs);
+
+ dst = replicas_entry_next(dst);
+
+ BUG_ON((void *) dst > vstruct_end(&sb_r->field));
+ }
+
+ return 0;
+}
+
static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
struct bch_replicas_cpu *r)
{
struct bch_sb_field_replicas *sb_r;
struct bch_replicas_entry *dst, *src;
+ bool need_v1 = false;
size_t bytes;
bytes = sizeof(struct bch_sb_field_replicas);
- for_each_cpu_replicas_entry(r, src)
+ for_each_cpu_replicas_entry(r, src) {
bytes += replicas_entry_bytes(src);
+ if (src->nr_required != 1)
+ need_v1 = true;
+ }
+
+ if (!need_v1)
+ return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
sb_r = bch2_sb_resize_replicas(&c->disk_sb,
DIV_ROUND_UP(bytes, sizeof(u64)));
if (!sb_r)
return -ENOSPC;
+ bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
+ sb_r = bch2_sb_get_replicas(c->disk_sb.sb);
+
memset(&sb_r->entries, 0,
vstruct_end(&sb_r->field) -
(void *) &sb_r->entries);
@@ -481,8 +586,10 @@ static const char *bch2_sb_validate_replicas(struct bch_sb *sb, struct bch_sb_fi
if (!e->nr_devs)
goto err;
- err = "invalid replicas entry: too many devices";
- if (e->nr_devs >= BCH_REPLICAS_MAX)
+ err = "invalid replicas entry: bad nr_required";
+ if (!e->nr_required ||
+ (e->nr_required > 1 &&
+ e->nr_required >= e->nr_devs))
goto err;
err = "invalid replicas entry: invalid device";
@@ -524,14 +631,53 @@ const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
.to_text = bch2_sb_replicas_to_text,
};
+static const char *bch2_sb_validate_replicas_v0(struct bch_sb *sb, struct bch_sb_field *f)
+{
+ struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
+ struct bch_sb_field_members *mi = bch2_sb_get_members(sb);
+ struct bch_replicas_cpu *cpu_r = NULL;
+ struct bch_replicas_entry_v0 *e;
+ const char *err;
+ unsigned i;
+
+ for_each_replicas_entry_v0(sb_r, e) {
+ err = "invalid replicas entry: invalid data type";
+ if (e->data_type >= BCH_DATA_NR)
+ goto err;
+
+ err = "invalid replicas entry: no devices";
+ if (!e->nr_devs)
+ goto err;
+
+ err = "invalid replicas entry: invalid device";
+ for (i = 0; i < e->nr_devs; i++)
+ if (!bch2_dev_exists(sb, mi, e->devs[i]))
+ goto err;
+ }
+
+ err = "cannot allocate memory";
+ cpu_r = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r);
+ if (!cpu_r)
+ goto err;
+
+ err = check_dup_replicas_entries(cpu_r);
+err:
+ kfree(cpu_r);
+ return err;
+}
+
+const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
+ .validate = bch2_sb_validate_replicas_v0,
+};
+
/* Query replicas: */
bool bch2_replicas_marked(struct bch_fs *c,
enum bch_data_type data_type,
- struct bch_devs_list devs)
+ struct bch_devs_list devs,
+ bool check_gc_replicas)
{
struct bch_replicas_entry_padded search;
- bool ret;
if (!devs.nr)
return true;
@@ -540,19 +686,15 @@ bool bch2_replicas_marked(struct bch_fs *c,
devlist_to_replicas(devs, data_type, &search.e);
- rcu_read_lock();
- ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
- rcu_read_unlock();
-
- return ret;
+ return replicas_has_entry(c, &search.e, check_gc_replicas);
}
bool bch2_bkey_replicas_marked(struct bch_fs *c,
enum bkey_type type,
- struct bkey_s_c k)
+ struct bkey_s_c k,
+ bool check_gc_replicas)
{
struct bch_replicas_entry_padded search;
- bool ret;
memset(&search, 0, sizeof(search));
@@ -562,20 +704,16 @@ bool bch2_bkey_replicas_marked(struct bch_fs *c,
for (i = 0; i < cached.nr; i++)
if (!bch2_replicas_marked(c, BCH_DATA_CACHED,
- bch2_dev_list_single(cached.devs[i])))
+ bch2_dev_list_single(cached.devs[i]),
+ check_gc_replicas))
return false;
}
bkey_to_replicas(type, k, &search.e);
- if (!search.e.nr_devs)
- return true;
-
- rcu_read_lock();
- ret = replicas_has_entry(rcu_dereference(c->replicas), &search.e);
- rcu_read_unlock();
-
- return ret;
+ return search.e.nr_devs
+ ? replicas_has_entry(c, &search.e, check_gc_replicas)
+ : true;
}
struct replicas_status __bch2_replicas_status(struct bch_fs *c,
@@ -590,7 +728,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
memset(&ret, 0, sizeof(ret));
for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
- ret.replicas[i].nr_online = UINT_MAX;
+ ret.replicas[i].redundancy = INT_MAX;
mi = bch2_sb_get_members(c->disk_sb.sb);
rcu_read_lock();
@@ -612,9 +750,9 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
nr_offline++;
}
- ret.replicas[e->data_type].nr_online =
- min(ret.replicas[e->data_type].nr_online,
- nr_online);
+ ret.replicas[e->data_type].redundancy =
+ min(ret.replicas[e->data_type].redundancy,
+ (int) nr_online - (int) e->nr_required);
ret.replicas[e->data_type].nr_offline =
max(ret.replicas[e->data_type].nr_offline,
@@ -623,6 +761,10 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *c,
rcu_read_unlock();
+ for (i = 0; i < ARRAY_SIZE(ret.replicas); i++)
+ if (ret.replicas[i].redundancy == INT_MAX)
+ ret.replicas[i].redundancy = 0;
+
return ret;
}
@@ -637,7 +779,7 @@ static bool have_enough_devs(struct replicas_status s,
bool force_if_lost)
{
return (!s.replicas[type].nr_offline || force_if_degraded) &&
- (s.replicas[type].nr_online || force_if_lost);
+ (s.replicas[type].redundancy >= 0 || force_if_lost);
}
bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
@@ -653,14 +795,14 @@ bool bch2_have_enough_devs(struct replicas_status s, unsigned flags)
flags & BCH_FORCE_IF_DATA_LOST));
}
-unsigned bch2_replicas_online(struct bch_fs *c, bool meta)
+int bch2_replicas_online(struct bch_fs *c, bool meta)
{
struct replicas_status s = bch2_replicas_status(c);
- return meta
- ? min(s.replicas[BCH_DATA_JOURNAL].nr_online,
- s.replicas[BCH_DATA_BTREE].nr_online)
- : s.replicas[BCH_DATA_USER].nr_online;
+ return (meta
+ ? min(s.replicas[BCH_DATA_JOURNAL].redundancy,
+ s.replicas[BCH_DATA_BTREE].redundancy)
+ : s.replicas[BCH_DATA_USER].redundancy) + 1;
}
unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
diff --git a/libbcachefs/replicas.h b/libbcachefs/replicas.h
index 7deca37c..7fee927c 100644
--- a/libbcachefs/replicas.h
+++ b/libbcachefs/replicas.h
@@ -4,9 +4,9 @@
#include "replicas_types.h"
bool bch2_replicas_marked(struct bch_fs *, enum bch_data_type,
- struct bch_devs_list);
+ struct bch_devs_list, bool);
bool bch2_bkey_replicas_marked(struct bch_fs *, enum bkey_type,
- struct bkey_s_c);
+ struct bkey_s_c, bool);
int bch2_mark_replicas(struct bch_fs *, enum bch_data_type,
struct bch_devs_list);
int bch2_mark_bkey_replicas(struct bch_fs *, enum bkey_type,
@@ -16,7 +16,7 @@ void bch2_cpu_replicas_to_text(struct printbuf *, struct bch_replicas_cpu *);
struct replicas_status {
struct {
- unsigned nr_online;
+ int redundancy;
unsigned nr_offline;
} replicas[BCH_DATA_NR];
};
@@ -26,7 +26,7 @@ struct replicas_status __bch2_replicas_status(struct bch_fs *,
struct replicas_status bch2_replicas_status(struct bch_fs *);
bool bch2_have_enough_devs(struct replicas_status, unsigned);
-unsigned bch2_replicas_online(struct bch_fs *, bool);
+int bch2_replicas_online(struct bch_fs *, bool);
unsigned bch2_dev_has_data(struct bch_fs *, struct bch_dev *);
int bch2_replicas_gc_end(struct bch_fs *, int);
@@ -45,8 +45,14 @@ int bch2_replicas_gc_start(struct bch_fs *, unsigned);
(void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
(_i) = replicas_entry_next(_i))
+#define for_each_replicas_entry_v0(_r, _i) \
+ for (_i = (_r)->entries; \
+ (void *) (_i) < vstruct_end(&(_r)->field) && (_i)->data_type;\
+ (_i) = replicas_entry_next(_i))
+
int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *);
extern const struct bch_sb_field_ops bch_sb_field_ops_replicas;
+extern const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0;
#endif /* _BCACHEFS_REPLICAS_H */
diff --git a/libbcachefs/super-io.c b/libbcachefs/super-io.c
index d1fca0d6..83523572 100644
--- a/libbcachefs/super-io.c
+++ b/libbcachefs/super-io.c
@@ -56,8 +56,13 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
void *src, *dst;
src = vstruct_end(f);
- f->u64s = cpu_to_le32(u64s);
- dst = vstruct_end(f);
+
+ if (u64s) {
+ f->u64s = cpu_to_le32(u64s);
+ dst = vstruct_end(f);
+ } else {
+ dst = f;
+ }
memmove(dst, src, vstruct_end(sb->sb) - src);
@@ -67,7 +72,16 @@ static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb,
sb->sb->u64s = cpu_to_le32(sb_u64s);
- return f;
+ return u64s ? f : NULL;
+}
+
+void bch2_sb_field_delete(struct bch_sb_handle *sb,
+ enum bch_sb_field_type type)
+{
+ struct bch_sb_field *f = bch2_sb_field_get(sb->sb, type);
+
+ if (f)
+ __bch2_sb_field_resize(sb, f, 0);
}
/* Superblock realloc/free: */
@@ -167,7 +181,8 @@ struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *sb,
}
f = __bch2_sb_field_resize(sb, f, u64s);
- f->type = cpu_to_le32(type);
+ if (f)
+ f->type = cpu_to_le32(type);
return f;
}
@@ -222,6 +237,10 @@ const char *bch2_sb_validate(struct bch_sb_handle *disk_sb)
le64_to_cpu(sb->version) > BCH_SB_VERSION_MAX)
return"Unsupported superblock version";
+ if (sb->features[1] ||
+ (le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR)))
+ return "Filesystem has incompatible features";
+
if (le64_to_cpu(sb->version) < BCH_SB_VERSION_EXTENT_MAX) {
SET_BCH_SB_ENCODED_EXTENT_MAX_BITS(sb, 7);
SET_BCH_SB_POSIX_ACL(sb, 1);
@@ -354,6 +373,7 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
{
struct bch_sb_field *src_f, *dst_f;
struct bch_sb *dst = dst_handle->sb;
+ unsigned i;
dst->version = src->version;
dst->seq = src->seq;
@@ -372,15 +392,17 @@ static void __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src)
memcpy(dst->features, src->features, sizeof(dst->features));
memcpy(dst->compat, src->compat, sizeof(dst->compat));
- vstruct_for_each(src, src_f) {
- if (src_f->type == BCH_SB_FIELD_journal)
+ for (i = 0; i < BCH_SB_FIELD_NR; i++) {
+ if (i == BCH_SB_FIELD_journal)
continue;
- dst_f = bch2_sb_field_get(dst, le32_to_cpu(src_f->type));
+ src_f = bch2_sb_field_get(src, i);
+ dst_f = bch2_sb_field_get(dst, i);
dst_f = __bch2_sb_field_resize(dst_handle, dst_f,
- le32_to_cpu(src_f->u64s));
+ src_f ? le32_to_cpu(src_f->u64s) : 0);
- memcpy(dst_f, src_f, vstruct_bytes(src_f));
+ if (src_f)
+ memcpy(dst_f, src_f, vstruct_bytes(src_f));
}
}
@@ -455,7 +477,7 @@ reread:
if (le64_to_cpu(sb->sb->version) < BCH_SB_VERSION_MIN ||
le64_to_cpu(sb->sb->version) > BCH_SB_VERSION_MAX)
- return"Unsupported superblock version";
+ return "Unsupported superblock version";
bytes = vstruct_bytes(sb->sb);
diff --git a/libbcachefs/super-io.h b/libbcachefs/super-io.h
index 6eb193ac..c66fd974 100644
--- a/libbcachefs/super-io.h
+++ b/libbcachefs/super-io.h
@@ -11,6 +11,7 @@
struct bch_sb_field *bch2_sb_field_get(struct bch_sb *, enum bch_sb_field_type);
struct bch_sb_field *bch2_sb_field_resize(struct bch_sb_handle *,
enum bch_sb_field_type, unsigned);
+void bch2_sb_field_delete(struct bch_sb_handle *, enum bch_sb_field_type);
#define field_to_type(_f, _name) \
container_of_or_null(_f, struct bch_sb_field_##_name, field)
diff --git a/libbcachefs/sysfs.c b/libbcachefs/sysfs.c
index c6a653ac..f793cfba 100644
--- a/libbcachefs/sysfs.c
+++ b/libbcachefs/sysfs.c
@@ -9,7 +9,6 @@
#include "bcachefs.h"
#include "alloc_background.h"
-#include "compress.h"
#include "sysfs.h"
#include "btree_cache.h"
#include "btree_io.h"
@@ -346,8 +345,8 @@ SHOW(bch2_fs)
sysfs_print(promote_whole_extents, c->promote_whole_extents);
- sysfs_printf(meta_replicas_have, "%u", bch2_replicas_online(c, true));
- sysfs_printf(data_replicas_have, "%u", bch2_replicas_online(c, false));
+ sysfs_printf(meta_replicas_have, "%i", bch2_replicas_online(c, true));
+ sysfs_printf(data_replicas_have, "%i", bch2_replicas_online(c, false));
/* Debugging: */
@@ -580,14 +579,9 @@ STORE(bch2_fs_opts_dir)
if (ret < 0)
return ret;
- if (id == Opt_compression ||
- id == Opt_background_compression) {
- int ret = bch2_check_set_has_compressed_data(c, v);
- if (ret) {
- mutex_unlock(&c->sb_lock);
- return ret;
- }
- }
+ ret = bch2_opt_check_may_set(c, id, v);
+ if (ret < 0)
+ return ret;
if (opt->set_sb != SET_NO_SB_OPT) {
mutex_lock(&c->sb_lock);
diff --git a/libbcachefs/xattr.c b/libbcachefs/xattr.c
index ed62668b..31f3b981 100644
--- a/libbcachefs/xattr.c
+++ b/libbcachefs/xattr.c
@@ -2,7 +2,6 @@
#include "bcachefs.h"
#include "bkey_methods.h"
#include "btree_update.h"
-#include "compress.h"
#include "extents.h"
#include "fs.h"
#include "rebalance.h"
@@ -430,12 +429,9 @@ static int bch2_xattr_bcachefs_set(const struct xattr_handler *handler,
if (ret < 0)
return ret;
- if (s.id == Opt_compression ||
- s.id == Opt_background_compression) {
- ret = bch2_check_set_has_compressed_data(c, s.v);
- if (ret)
- return ret;
- }
+ ret = bch2_opt_check_may_set(c, s.id, s.v);
+ if (ret < 0)
+ return ret;
s.defined = true;
} else {