summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.bcachefs_revision2
-rw-r--r--Makefile5
-rw-r--r--libbcachefs/alloc.c26
-rw-r--r--libbcachefs/alloc.h1
-rw-r--r--libbcachefs/bcachefs.h3
-rw-r--r--libbcachefs/bcachefs_format.h18
-rw-r--r--libbcachefs/bkey_methods.h2
-rw-r--r--libbcachefs/btree_gc.c116
-rw-r--r--libbcachefs/btree_io.c2
-rw-r--r--libbcachefs/btree_locking.h3
-rw-r--r--libbcachefs/fifo.h29
-rw-r--r--libbcachefs/inode.h2
-rw-r--r--libbcachefs/io.c36
-rw-r--r--libbcachefs/io_types.h3
-rw-r--r--libbcachefs/journal.c51
-rw-r--r--libbcachefs/opts.c6
-rw-r--r--libbcachefs/str_hash.h2
-rw-r--r--libbcachefs/super.c29
-rw-r--r--libbcachefs/super.h5
-rw-r--r--libbcachefs/util.c2
20 files changed, 207 insertions, 136 deletions
diff --git a/.bcachefs_revision b/.bcachefs_revision
index e8975f4c..ad735e5a 100644
--- a/.bcachefs_revision
+++ b/.bcachefs_revision
@@ -1 +1 @@
-c07073eb3b218df0ea107a3e04d2431703f0c07b
+83667254ddf04f558c90f32439e36d7a04ac3a39
diff --git a/Makefile b/Makefile
index 7e00baad..0822433c 100644
--- a/Makefile
+++ b/Makefile
@@ -121,3 +121,8 @@ update-bcachefs-sources:
echo `cd $(LINUX_DIR); git rev-parse HEAD` > .bcachefs_revision
cp $(LINUX_DIR)/fs/bcachefs/*.[ch] libbcachefs/
cp $(LINUX_DIR)/include/trace/events/bcachefs.h include/trace/events/
+
+.PHONE: update-commit-bcachefs-sources
+update-commit-bcachefs-sources: update-bcachefs-sources
+ git commit -m "Update bcachefs sources to `cut -b1-10 .bcachefs_revision`" \
+ .bcachefs_revision libbcachefs/
diff --git a/libbcachefs/alloc.c b/libbcachefs/alloc.c
index fc2a4ab4..d5d2679f 100644
--- a/libbcachefs/alloc.c
+++ b/libbcachefs/alloc.c
@@ -256,19 +256,22 @@ static struct nonce prio_nonce(struct prio_set *p)
}};
}
-static int bch2_prio_write(struct bch_dev *ca)
+int bch2_prio_write(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
struct journal *j = &c->journal;
struct journal_res res = { 0 };
bool need_new_journal_entry;
- int i, ret;
+ int i, ret = 0;
if (c->opts.nochanges)
return 0;
+ mutex_lock(&ca->prio_write_lock);
trace_prio_write_start(ca);
+ ca->need_prio_write = false;
+
atomic64_add(ca->mi.bucket_size * prio_buckets(ca),
&ca->meta_sectors_written);
@@ -322,7 +325,7 @@ static int bch2_prio_write(struct bch_dev *ca)
if (bch2_dev_fatal_io_err_on(ret, ca,
"prio write to bucket %zu", r) ||
bch2_meta_write_fault("prio"))
- return ret;
+ goto err;
}
spin_lock(&j->lock);
@@ -340,7 +343,7 @@ static int bch2_prio_write(struct bch_dev *ca)
ret = bch2_journal_res_get(j, &res, u64s, u64s);
if (ret)
- return ret;
+ goto err;
need_new_journal_entry = j->buf[res.idx].nr_prio_buckets <
ca->dev_idx + 1;
@@ -348,7 +351,7 @@ static int bch2_prio_write(struct bch_dev *ca)
ret = bch2_journal_flush_seq(j, res.seq);
if (ret)
- return ret;
+ goto err;
} while (need_new_journal_entry);
/*
@@ -369,7 +372,9 @@ static int bch2_prio_write(struct bch_dev *ca)
spin_unlock(&ca->prio_buckets_lock);
trace_prio_write_end(ca);
- return 0;
+err:
+ mutex_unlock(&ca->prio_write_lock);
+ return ret;
}
int bch2_prio_read(struct bch_dev *ca)
@@ -863,6 +868,7 @@ static int bch2_allocator_thread(void *arg)
{
struct bch_dev *ca = arg;
struct bch_fs *c = ca->fs;
+ long bucket;
int ret;
set_freezable();
@@ -877,7 +883,7 @@ static int bch2_allocator_thread(void *arg)
*/
while (!fifo_empty(&ca->free_inc)) {
- long bucket = fifo_peek(&ca->free_inc);
+ bucket = fifo_peek(&ca->free_inc);
/*
* Don't remove from free_inc until after it's added
@@ -960,12 +966,8 @@ static int bch2_allocator_thread(void *arg)
* consistent-ish:
*/
spin_lock(&ca->freelist_lock);
- while (!fifo_empty(&ca->free_inc)) {
- long bucket;
-
- fifo_pop(&ca->free_inc, bucket);
+ while (fifo_pop(&ca->free_inc, bucket))
bch2_mark_free_bucket(ca, ca->buckets + bucket);
- }
spin_unlock(&ca->freelist_lock);
goto out;
}
diff --git a/libbcachefs/alloc.h b/libbcachefs/alloc.h
index 08638b25..c6b57fa1 100644
--- a/libbcachefs/alloc.h
+++ b/libbcachefs/alloc.h
@@ -24,6 +24,7 @@ void bch2_dev_group_remove(struct dev_group *, struct bch_dev *);
void bch2_dev_group_add(struct dev_group *, struct bch_dev *);
int bch2_prio_read(struct bch_dev *);
+int bch2_prio_write(struct bch_dev *);
size_t bch2_bucket_alloc(struct bch_dev *, enum alloc_reserve);
diff --git a/libbcachefs/bcachefs.h b/libbcachefs/bcachefs.h
index 90d40986..cf1c4bd6 100644
--- a/libbcachefs/bcachefs.h
+++ b/libbcachefs/bcachefs.h
@@ -379,6 +379,8 @@ struct bch_dev {
spinlock_t prio_buckets_lock;
struct bio *bio_prio;
bool prio_read_done;
+ bool need_prio_write;
+ struct mutex prio_write_lock;
/*
* free: Buckets that are ready to be used
@@ -456,6 +458,7 @@ enum {
BCH_FS_BDEV_MOUNTED,
BCH_FS_ERROR,
BCH_FS_FSCK_FIXED_ERRORS,
+ BCH_FS_FIXED_GENS,
};
struct btree_debug {
diff --git a/libbcachefs/bcachefs_format.h b/libbcachefs/bcachefs_format.h
index 0a0dc870..8d780d27 100644
--- a/libbcachefs/bcachefs_format.h
+++ b/libbcachefs/bcachefs_format.h
@@ -617,7 +617,7 @@ struct bch_inode {
__le32 i_flags;
__le16 i_mode;
__u8 fields[0];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(inode, BCH_INODE_FS);
#define BCH_INODE_FIELDS() \
@@ -714,7 +714,7 @@ struct bch_dirent {
__u8 d_type;
__u8 d_name[];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(dirent, BCH_DIRENT);
/* Xattrs */
@@ -736,7 +736,7 @@ struct bch_xattr {
__u8 x_name_len;
__le16 x_val_len;
__u8 x_name[];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
BKEY_VAL_TYPE(xattr, BCH_XATTR);
/* Superblock */
@@ -811,7 +811,7 @@ struct bch_sb_layout {
__u8 nr_superblocks;
__u8 pad[5];
__u64 sb_offset[61];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
#define BCH_SB_LAYOUT_SECTOR 7
@@ -1211,7 +1211,7 @@ struct jset {
struct jset_entry start[0];
__u64 _data[0];
};
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
LE32_BITMASK(JSET_CSUM_TYPE, struct jset, flags, 0, 4);
LE32_BITMASK(JSET_BIG_ENDIAN, struct jset, flags, 4, 5);
@@ -1237,7 +1237,7 @@ struct prio_set {
__le16 write_prio;
__u8 gen;
} __attribute__((packed)) data[];
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
LE32_BITMASK(PSET_CSUM_TYPE, struct prio_set, flags, 0, 4);
@@ -1295,7 +1295,7 @@ struct bset {
struct bkey_packed start[0];
__u64 _data[0];
};
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
LE32_BITMASK(BSET_CSUM_TYPE, struct bset, flags, 0, 4);
@@ -1325,7 +1325,7 @@ struct btree_node {
};
};
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
LE64_BITMASK(BTREE_NODE_ID, struct btree_node, flags, 0, 4);
LE64_BITMASK(BTREE_NODE_LEVEL, struct btree_node, flags, 4, 8);
@@ -1342,7 +1342,7 @@ struct btree_node_entry {
};
};
-} __attribute__((packed));
+} __attribute__((packed, aligned(8)));
#ifdef __cplusplus
}
diff --git a/libbcachefs/bkey_methods.h b/libbcachefs/bkey_methods.h
index d372fa61..f795db6d 100644
--- a/libbcachefs/bkey_methods.h
+++ b/libbcachefs/bkey_methods.h
@@ -13,7 +13,7 @@ enum bkey_type {
/* Type of a key in btree @id at level @level: */
static inline enum bkey_type bkey_type(unsigned level, enum btree_id id)
{
- return level ? BKEY_TYPE_BTREE : id;
+ return level ? BKEY_TYPE_BTREE : (enum bkey_type) id;
}
static inline bool btree_type_has_ptrs(enum bkey_type type)
diff --git a/libbcachefs/btree_gc.c b/libbcachefs/btree_gc.c
index 3620c29e..e07a3f97 100644
--- a/libbcachefs/btree_gc.c
+++ b/libbcachefs/btree_gc.c
@@ -129,7 +129,7 @@ static u8 bch2_btree_mark_key(struct bch_fs *c, enum bkey_type type,
int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
struct bkey_s_c k)
{
- int ret;
+ int ret = 0;
switch (k.k->type) {
case BCH_EXTENT:
@@ -140,12 +140,17 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
extent_for_each_ptr(e, ptr) {
struct bch_dev *ca = c->devs[ptr->dev];
struct bucket *g = PTR_BUCKET(ca, ptr);
-
- unfixable_fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
- "%s ptr gen in the future: %u > %u",
- type == BKEY_TYPE_BTREE
- ? "btree" : "data",
- ptr->gen, g->mark.gen);
+ struct bucket_mark new;
+
+ if (fsck_err_on(gen_cmp(ptr->gen, g->mark.gen) > 0, c,
+ "%s ptr gen in the future: %u > %u",
+ type == BKEY_TYPE_BTREE
+ ? "btree" : "data",
+ ptr->gen, g->mark.gen)) {
+ bucket_cmpxchg(g, new, new.gen = ptr->gen);
+ set_bit(BCH_FS_FIXED_GENS, &c->flags);
+ ca->need_prio_write = true;
+ }
}
break;
@@ -157,7 +162,6 @@ int bch2_btree_mark_key_initial(struct bch_fs *c, enum bkey_type type,
atomic64_read(&c->key_version)));
bch2_btree_mark_key(c, type, k);
- return 0;
fsck_err:
return ret;
}
@@ -382,50 +386,14 @@ static void bch2_mark_pending_btree_node_frees(struct bch_fs *c)
mutex_unlock(&c->btree_interior_update_lock);
}
-/**
- * bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes
- */
-void bch2_gc(struct bch_fs *c)
+void bch2_gc_start(struct bch_fs *c)
{
struct bch_dev *ca;
struct bucket *g;
struct bucket_mark new;
- u64 start_time = local_clock();
unsigned i;
int cpu;
- /*
- * Walk _all_ references to buckets, and recompute them:
- *
- * Order matters here:
- * - Concurrent GC relies on the fact that we have a total ordering for
- * everything that GC walks - see gc_will_visit_node(),
- * gc_will_visit_root()
- *
- * - also, references move around in the course of index updates and
- * various other crap: everything needs to agree on the ordering
- * references are allowed to move around in - e.g., we're allowed to
- * start with a reference owned by an open_bucket (the allocator) and
- * move it to the btree, but not the reverse.
- *
- * This is necessary to ensure that gc doesn't miss references that
- * move around - if references move backwards in the ordering GC
- * uses, GC could skip past them
- */
-
- if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
- return;
-
- trace_gc_start(c);
-
- /*
- * Do this before taking gc_lock - bch2_disk_reservation_get() blocks on
- * gc_lock if sectors_available goes to 0:
- */
- bch2_recalc_sectors_available(c);
-
- down_write(&c->gc_lock);
-
lg_global_lock(&c->usage_lock);
/*
@@ -466,6 +434,50 @@ void bch2_gc(struct bch_fs *c)
}));
ca->oldest_gens[g - ca->buckets] = new.gen;
}
+}
+
+/**
+ * bch_gc - recompute bucket marks and oldest_gen, rewrite btree nodes
+ */
+void bch2_gc(struct bch_fs *c)
+{
+ struct bch_dev *ca;
+ u64 start_time = local_clock();
+ unsigned i;
+
+ /*
+ * Walk _all_ references to buckets, and recompute them:
+ *
+ * Order matters here:
+ * - Concurrent GC relies on the fact that we have a total ordering for
+ * everything that GC walks - see gc_will_visit_node(),
+ * gc_will_visit_root()
+ *
+ * - also, references move around in the course of index updates and
+ * various other crap: everything needs to agree on the ordering
+ * references are allowed to move around in - e.g., we're allowed to
+ * start with a reference owned by an open_bucket (the allocator) and
+ * move it to the btree, but not the reverse.
+ *
+ * This is necessary to ensure that gc doesn't miss references that
+ * move around - if references move backwards in the ordering GC
+ * uses, GC could skip past them
+ */
+
+ if (test_bit(BCH_FS_GC_FAILURE, &c->flags))
+ return;
+
+ trace_gc_start(c);
+
+ /*
+ * Do this before taking gc_lock - bch2_disk_reservation_get() blocks on
+ * gc_lock if sectors_available goes to 0:
+ */
+ bch2_recalc_sectors_available(c);
+
+ down_write(&c->gc_lock);
+
+ bch2_gc_start(c);
/* Walk allocator's references: */
bch2_mark_allocator_buckets(c);
@@ -964,8 +976,11 @@ err:
int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
{
+ unsigned iter = 0;
enum btree_id id;
int ret;
+again:
+ bch2_gc_start(c);
for (id = 0; id < BTREE_ID_NR; id++) {
ret = bch2_initial_gc_btree(c, id);
@@ -981,6 +996,17 @@ int bch2_initial_gc(struct bch_fs *c, struct list_head *journal)
bch2_mark_metadata(c);
+ if (test_bit(BCH_FS_FIXED_GENS, &c->flags)) {
+ if (iter++ > 2) {
+ bch_info(c, "Unable to fix bucket gens, looping");
+ return -EINVAL;
+ }
+
+ bch_info(c, "Fixed gens, restarting initial mark and sweep:");
+ clear_bit(BCH_FS_FIXED_GENS, &c->flags);
+ goto again;
+ }
+
/*
* Skip past versions that might have possibly been used (as nonces),
* but hadn't had their pointers written:
diff --git a/libbcachefs/btree_io.c b/libbcachefs/btree_io.c
index bb8cee15..46612c10 100644
--- a/libbcachefs/btree_io.c
+++ b/libbcachefs/btree_io.c
@@ -1305,7 +1305,7 @@ static void btree_node_write_endio(struct bio *bio)
closure_put(cl);
}
- if (ca)
+ if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref);
}
diff --git a/libbcachefs/btree_locking.h b/libbcachefs/btree_locking.h
index 27709d1d..0945ea89 100644
--- a/libbcachefs/btree_locking.h
+++ b/libbcachefs/btree_locking.h
@@ -74,8 +74,7 @@ static inline void mark_btree_node_intent_locked(struct btree_iter *iter,
mark_btree_node_locked(iter, level, SIX_LOCK_intent);
}
-static inline enum six_lock_type
-btree_lock_want(struct btree_iter *iter, int level)
+static inline int btree_lock_want(struct btree_iter *iter, int level)
{
return level < iter->locks_want
? SIX_LOCK_intent
diff --git a/libbcachefs/fifo.h b/libbcachefs/fifo.h
index 2908ca23..a391277e 100644
--- a/libbcachefs/fifo.h
+++ b/libbcachefs/fifo.h
@@ -71,27 +71,33 @@ do { \
#define fifo_entry_idx(fifo, p) (((p) - &fifo_peek_front(fifo)) & (fifo)->mask)
-#define fifo_push_back(fifo, i) \
+#define fifo_push_back_ref(f) \
+ (fifo_full((f)) ? NULL : &(f)->data[(f)->back++ & (f)->mask])
+
+#define fifo_push_front_ref(f) \
+ (fifo_full((f)) ? NULL : &(f)->data[--(f)->front & (f)->mask])
+
+#define fifo_push_back(fifo, new) \
({ \
- bool _r = !fifo_full((fifo)); \
+ typeof((fifo)->data) _r = fifo_push_back_ref(fifo); \
if (_r) \
- (fifo)->data[(fifo)->back++ & (fifo)->mask] = (i); \
- _r; \
+ *_r = (new); \
+ _r != NULL; \
})
-#define fifo_pop_front(fifo, i) \
+#define fifo_push_front(fifo, new) \
({ \
- bool _r = !fifo_empty((fifo)); \
+ typeof((fifo)->data) _r = fifo_push_front_ref(fifo); \
if (_r) \
- (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \
- _r; \
+ *_r = (new); \
+ _r != NULL; \
})
-#define fifo_push_front(fifo, i) \
+#define fifo_pop_front(fifo, i) \
({ \
- bool _r = !fifo_full((fifo)); \
+ bool _r = !fifo_empty((fifo)); \
if (_r) \
- (fifo)->data[--(fifo)->front & (fifo)->mask] = (i); \
+ (i) = (fifo)->data[(fifo)->front++ & (fifo)->mask]; \
_r; \
})
@@ -103,6 +109,7 @@ do { \
_r; \
})
+#define fifo_push_ref(fifo) fifo_push_back_ref(fifo)
#define fifo_push(fifo, i) fifo_push_back(fifo, (i))
#define fifo_pop(fifo, i) fifo_pop_front(fifo, (i))
#define fifo_peek(fifo) fifo_peek_front(fifo)
diff --git a/libbcachefs/inode.h b/libbcachefs/inode.h
index 277d4e42..d1d64a7f 100644
--- a/libbcachefs/inode.h
+++ b/libbcachefs/inode.h
@@ -22,7 +22,7 @@ struct bkey_inode_buf {
#define BCH_INODE_FIELD(_name, _bits) + 8 + _bits / 8
u8 _pad[0 + BCH_INODE_FIELDS()];
#undef BCH_INODE_FIELD
-} __packed;
+};
void bch2_inode_pack(struct bkey_inode_buf *, const struct bch_inode_unpacked *);
int bch2_inode_unpack(struct bkey_s_c_inode, struct bch_inode_unpacked *);
diff --git a/libbcachefs/io.c b/libbcachefs/io.c
index d3494611..44082a0e 100644
--- a/libbcachefs/io.c
+++ b/libbcachefs/io.c
@@ -85,22 +85,6 @@ void bch2_bio_alloc_pages_pool(struct bch_fs *c, struct bio *bio,
/* Bios with headers */
-static void bch2_submit_wbio(struct bch_fs *c, struct bch_write_bio *wbio,
- struct bch_dev *ca, const struct bch_extent_ptr *ptr)
-{
- wbio->ca = ca;
- wbio->submit_time_us = local_clock_us();
- wbio->bio.bi_iter.bi_sector = ptr->offset;
- wbio->bio.bi_bdev = ca ? ca->disk_sb.bdev : NULL;
-
- if (unlikely(!ca)) {
- bcache_io_error(c, &wbio->bio, "device has been removed");
- bio_endio(&wbio->bio);
- } else {
- generic_make_request(&wbio->bio);
- }
-}
-
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
const struct bkey_i *k)
{
@@ -116,10 +100,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
extent_for_each_ptr(e, ptr) {
ca = c->devs[ptr->dev];
- if (!percpu_ref_tryget(&ca->io_ref)) {
- bch2_submit_wbio(c, wbio, NULL, ptr);
- break;
- }
if (ptr + 1 < &extent_entry_last(e)->ptr) {
n = to_wbio(bio_clone_fast(&wbio->bio, GFP_NOIO,
@@ -132,6 +112,7 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->bounce = false;
n->split = true;
n->put_bio = true;
+ n->have_io_ref = true;
n->bio.bi_opf = wbio->bio.bi_opf;
__bio_inc_remaining(n->orig);
} else {
@@ -141,7 +122,18 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
if (!journal_flushes_device(ca))
n->bio.bi_opf |= REQ_FUA;
- bch2_submit_wbio(c, n, ca, ptr);
+ n->ca = ca;
+ n->submit_time_us = local_clock_us();
+ n->bio.bi_iter.bi_sector = ptr->offset;
+
+ if (likely(percpu_ref_tryget(&ca->io_ref))) {
+ n->bio.bi_bdev = ca->disk_sb.bdev;
+ generic_make_request(&n->bio);
+ } else {
+ n->have_io_ref = false;
+ bcache_io_error(c, &n->bio, "device has been removed");
+ bio_endio(&n->bio);
+ }
}
}
@@ -327,7 +319,7 @@ static void bch2_write_endio(struct bio *bio)
set_closure_fn(cl, bch2_write_io_error, index_update_wq(op));
}
- if (ca)
+ if (wbio->have_io_ref)
percpu_ref_put(&ca->io_ref);
if (bio->bi_error && orig)
diff --git a/libbcachefs/io_types.h b/libbcachefs/io_types.h
index 07ea67c6..d104cb72 100644
--- a/libbcachefs/io_types.h
+++ b/libbcachefs/io_types.h
@@ -73,7 +73,8 @@ struct bch_write_bio {
unsigned submit_time_us;
unsigned split:1,
bounce:1,
- put_bio:1;
+ put_bio:1,
+ have_io_ref:1;
/* Only for btree writes: */
unsigned used_mempool:1;
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index 76a3b465..7d250df8 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -1122,21 +1122,31 @@ void bch2_journal_buf_put_slowpath(struct journal *j, bool need_write_just_set)
#endif
}
-static void __bch2_journal_next_entry(struct journal *j)
+static struct journal_entry_pin_list *
+__journal_entry_new(struct journal *j, int count)
{
- struct journal_entry_pin_list pin_list, *p;
- struct journal_buf *buf;
+ struct journal_entry_pin_list *p = fifo_push_ref(&j->pin);
/*
* The fifo_push() needs to happen at the same time as j->seq is
* incremented for last_seq() to be calculated correctly
*/
atomic64_inc(&j->seq);
- BUG_ON(!fifo_push(&j->pin, pin_list));
- p = &fifo_peek_back(&j->pin);
+
+ BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq));
INIT_LIST_HEAD(&p->list);
- atomic_set(&p->count, 1);
+ atomic_set(&p->count, count);
+
+ return p;
+}
+
+static void __bch2_journal_next_entry(struct journal *j)
+{
+ struct journal_entry_pin_list *p;
+ struct journal_buf *buf;
+
+ p = __journal_entry_new(j, 1);
if (test_bit(JOURNAL_REPLAY_DONE, &j->flags)) {
smp_wmb();
@@ -1149,8 +1159,6 @@ static void __bch2_journal_next_entry(struct journal *j)
memset(buf->data, 0, sizeof(*buf->data));
buf->data->seq = cpu_to_le64(atomic64_read(&j->seq));
buf->data->u64s = 0;
-
- BUG_ON(journal_pin_seq(j, p) != atomic64_read(&j->seq));
}
static inline size_t journal_entry_u64s_reserve(struct journal_buf *buf)
@@ -1423,16 +1431,8 @@ void bch2_journal_start(struct bch_fs *c)
set_bit(JOURNAL_STARTED, &j->flags);
- while (atomic64_read(&j->seq) < new_seq) {
- struct journal_entry_pin_list pin_list, *p;
-
- BUG_ON(!fifo_push(&j->pin, pin_list));
- p = &fifo_peek_back(&j->pin);
-
- INIT_LIST_HEAD(&p->list);
- atomic_set(&p->count, 0);
- atomic64_inc(&j->seq);
- }
+ while (atomic64_read(&j->seq) < new_seq)
+ __journal_entry_new(j, 0);
/*
* journal_buf_switch() only inits the next journal entry when it
@@ -1494,8 +1494,11 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
BTREE_INSERT_JOURNAL_REPLAY);
bch2_disk_reservation_put(c, &disk_res);
- if (ret)
+ if (ret) {
+ bch_err(c, "journal replay: error %d while replaying key",
+ ret);
goto err;
+ }
cond_resched();
keys++;
@@ -1517,8 +1520,10 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
* entry on disk, if we crash before writing the next journal entry:
*/
ret = bch2_journal_meta(&c->journal);
- if (ret)
+ if (ret) {
+ bch_err(c, "journal replay: error %d flushing journal", ret);
goto err;
+ }
}
bch_info(c, "journal replay done, %i keys in %i entries, seq %llu",
@@ -1526,11 +1531,7 @@ int bch2_journal_replay(struct bch_fs *c, struct list_head *list)
bch2_journal_set_replay_done(&c->journal);
err:
- if (ret)
- bch_err(c, "journal replay error: %d", ret);
-
bch2_journal_entries_free(list);
-
return ret;
}
@@ -2372,7 +2373,7 @@ retry:
switch (journal_buf_switch(j, false)) {
case JOURNAL_ENTRY_ERROR:
spin_unlock(&j->lock);
- return -EIO;
+ return -EROFS;
case JOURNAL_ENTRY_INUSE:
/* haven't finished writing out the previous one: */
spin_unlock(&j->lock);
diff --git a/libbcachefs/opts.c b/libbcachefs/opts.c
index 7c4cf804..1eb27ae1 100644
--- a/libbcachefs/opts.c
+++ b/libbcachefs/opts.c
@@ -72,7 +72,7 @@ const struct bch_option bch2_opt_table[] = {
#undef BCH_OPT
};
-static enum bch_opt_id bch2_opt_lookup(const char *name)
+static int bch2_opt_lookup(const char *name)
{
const struct bch_option *i;
@@ -209,7 +209,7 @@ int bch2_parse_mount_opts(struct bch_opts *opts, char *options)
enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val,
u64 *res)
{
- enum bch_opt_id id = bch2_opt_lookup(name);
+ int id = bch2_opt_lookup(name);
int ret;
if (id < 0)
@@ -225,7 +225,7 @@ enum bch_opt_id bch2_parse_sysfs_opt(const char *name, const char *val,
ssize_t bch2_opt_show(struct bch_opts *opts, const char *name,
char *buf, size_t size)
{
- enum bch_opt_id id = bch2_opt_lookup(name);
+ int id = bch2_opt_lookup(name);
const struct bch_option *opt;
u64 v;
diff --git a/libbcachefs/str_hash.h b/libbcachefs/str_hash.h
index f70fc1a9..6eac6fc0 100644
--- a/libbcachefs/str_hash.h
+++ b/libbcachefs/str_hash.h
@@ -25,7 +25,7 @@ bch2_hash_info_init(struct bch_fs *c,
/* XXX ick */
struct bch_hash_info info = {
.type = (bi->i_flags >> INODE_STR_HASH_OFFSET) &
- ~(~0 << INODE_STR_HASH_BITS)
+ ~(~0U << INODE_STR_HASH_BITS)
};
switch (info.type) {
diff --git a/libbcachefs/super.c b/libbcachefs/super.c
index b8139742..19f96921 100644
--- a/libbcachefs/super.c
+++ b/libbcachefs/super.c
@@ -768,6 +768,15 @@ static const char *__bch2_fs_start(struct bch_fs *c)
if (ret)
goto err;
+ for_each_rw_member(ca, c, i)
+ if (ca->need_prio_write) {
+ ret = bch2_prio_write(ca);
+ if (ret) {
+ percpu_ref_put(&ca->io_ref);
+ goto err;
+ }
+ }
+
bch_verbose(c, "fsck done");
} else {
struct bch_inode_unpacked inode;
@@ -1092,6 +1101,7 @@ static int bch2_dev_alloc(struct bch_fs *c, unsigned dev_idx)
spin_lock_init(&ca->freelist_lock);
spin_lock_init(&ca->prio_buckets_lock);
mutex_init(&ca->heap_lock);
+ mutex_init(&ca->prio_write_lock);
bch2_dev_moving_gc_init(ca);
INIT_WORK(&ca->io_error_work, bch2_nonfatal_io_error_work);
@@ -1265,6 +1275,15 @@ bool bch2_fs_may_start(struct bch_fs *c, int flags)
return true;
}
+/*
+ * Note: this function is also used by the error paths - when a particular
+ * device sees an error, we call it to determine whether we can just set the
+ * device RO, or - if this function returns false - we'll set the whole
+ * filesystem RO:
+ *
+ * XXX: maybe we should be more explicit about whether we're changing state
+ * because we got an error or what have you?
+ */
bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
enum bch_member_state new_state, int flags)
{
@@ -1273,6 +1292,16 @@ bool bch2_dev_state_allowed(struct bch_fs *c, struct bch_dev *ca,
if (new_state == BCH_MEMBER_STATE_RW)
return true;
+ if (ca->mi.state == BCH_MEMBER_STATE_FAILED)
+ return true;
+
+ /*
+ * If the device is already offline - whatever is going on with it can't
+ * possible make the FS need to go RO:
+ */
+ if (!bch2_dev_is_online(ca))
+ return true;
+
if (ca->mi.has_data &&
!(flags & BCH_FORCE_IF_DATA_DEGRADED))
return false;
diff --git a/libbcachefs/super.h b/libbcachefs/super.h
index 700344a9..e4bb583f 100644
--- a/libbcachefs/super.h
+++ b/libbcachefs/super.h
@@ -32,6 +32,11 @@ static inline struct bch_dev *__bch2_next_dev(struct bch_fs *c, unsigned *iter)
return ca;
}
+static inline bool bch2_dev_is_online(struct bch_dev *ca)
+{
+ return !percpu_ref_is_zero(&ca->io_ref);
+}
+
#define __for_each_member_device(ca, c, iter) \
for ((iter) = 0; ((ca) = __bch2_next_dev((c), &(iter))); (iter)++)
diff --git a/libbcachefs/util.c b/libbcachefs/util.c
index f2e6ec4d..f57224a6 100644
--- a/libbcachefs/util.c
+++ b/libbcachefs/util.c
@@ -85,7 +85,7 @@ ssize_t bch2_hprint(char *buf, s64 v)
int u, t = 0;
for (u = 0; v >= 1024 || v <= -1024; u++) {
- t = v & ~(~0 << 10);
+ t = v & ~(~0U << 10);
v >>= 10;
}