summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/Kconfig7
-rw-r--r--fs/bcachefs/btree_cache.c9
-rw-r--r--fs/bcachefs/btree_io.c4
-rw-r--r--fs/bcachefs/btree_iter.c33
-rw-r--r--fs/bcachefs/btree_iter.h14
-rw-r--r--fs/bcachefs/btree_key_cache.c2
-rw-r--r--fs/bcachefs/btree_locking.c5
-rw-r--r--fs/bcachefs/btree_locking.h2
-rw-r--r--fs/bcachefs/btree_trans_commit.c4
-rw-r--r--fs/bcachefs/btree_types.h3
-rw-r--r--fs/bcachefs/btree_update.h8
-rw-r--r--fs/bcachefs/btree_update_interior.c2
-rw-r--r--fs/bcachefs/btree_update_interior.h4
-rw-r--r--fs/bcachefs/btree_write_buffer.c21
-rw-r--r--fs/bcachefs/data_update.c1
-rw-r--r--fs/bcachefs/dirent.h5
-rw-r--r--fs/bcachefs/disk_accounting.h2
-rw-r--r--fs/bcachefs/extents.c2
-rw-r--r--fs/bcachefs/extents.h2
-rw-r--r--fs/bcachefs/fs-common.c11
-rw-r--r--fs/bcachefs/fs-io.c1
-rw-r--r--fs/bcachefs/fs-ioctl.c4
-rw-r--r--fs/bcachefs/fs.c8
-rw-r--r--fs/bcachefs/fsck.c57
-rw-r--r--fs/bcachefs/inode.c1
-rw-r--r--fs/bcachefs/io_read.c19
-rw-r--r--fs/bcachefs/io_write.c12
-rw-r--r--fs/bcachefs/journal.c57
-rw-r--r--fs/bcachefs/journal_reclaim.c37
-rw-r--r--fs/bcachefs/journal_types.h5
-rw-r--r--fs/bcachefs/movinggc.c25
-rw-r--r--fs/bcachefs/reflink.c16
-rw-r--r--fs/bcachefs/sb-downgrade.c5
-rw-r--r--fs/bcachefs/sb-errors_format.h4
-rw-r--r--fs/bcachefs/six.c5
-rw-r--r--fs/bcachefs/six.h7
-rw-r--r--fs/bcachefs/super-io.c24
-rw-r--r--fs/bcachefs/super-io.h11
-rw-r--r--fs/bcachefs/super.c11
-rw-r--r--fs/bcachefs/util.c24
-rw-r--r--fs/bcachefs/util.h2
41 files changed, 273 insertions, 203 deletions
diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig
index 85eea7a4dea3..fc7efd0a7525 100644
--- a/fs/bcachefs/Kconfig
+++ b/fs/bcachefs/Kconfig
@@ -61,6 +61,13 @@ config BCACHEFS_DEBUG
The resulting code will be significantly slower than normal; you
probably shouldn't select this option unless you're a developer.
+config BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ bool "Randomly inject transaction restarts"
+ depends on BCACHEFS_DEBUG
+ help
+ Randomly inject transaction restarts in a few core paths - may have a
+ significant performance penalty
+
config BCACHEFS_TESTS
bool "bcachefs unit and performance tests"
depends on BCACHEFS_FS
diff --git a/fs/bcachefs/btree_cache.c b/fs/bcachefs/btree_cache.c
index ca755e8d1a37..1ec1f90e0eb3 100644
--- a/fs/bcachefs/btree_cache.c
+++ b/fs/bcachefs/btree_cache.c
@@ -203,7 +203,7 @@ struct btree *__bch2_btree_node_mem_alloc(struct bch_fs *c)
return NULL;
}
- bch2_btree_lock_init(&b->c, 0);
+ bch2_btree_lock_init(&b->c, 0, GFP_KERNEL);
__bch2_btree_node_to_freelist(bc, b);
return b;
@@ -795,17 +795,18 @@ struct btree *bch2_btree_node_mem_alloc(struct btree_trans *trans, bool pcpu_rea
}
b = __btree_node_mem_alloc(c, GFP_NOWAIT|__GFP_NOWARN);
- if (!b) {
+ if (b) {
+ bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_NOWAIT);
+ } else {
mutex_unlock(&bc->lock);
bch2_trans_unlock(trans);
b = __btree_node_mem_alloc(c, GFP_KERNEL);
if (!b)
goto err;
+ bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
mutex_lock(&bc->lock);
}
- bch2_btree_lock_init(&b->c, pcpu_read_locks ? SIX_LOCK_INIT_PCPU : 0);
-
BUG_ON(!six_trylock_intent(&b->c.lock));
BUG_ON(!six_trylock_write(&b->c.lock));
diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c
index e371e60e3133..756736f9243d 100644
--- a/fs/bcachefs/btree_io.c
+++ b/fs/bcachefs/btree_io.c
@@ -996,7 +996,7 @@ drop_this_key:
}
got_good_key:
le16_add_cpu(&i->u64s, -next_good_key);
- memmove_u64s_down(k, bkey_p_next(k), (u64 *) vstruct_end(i) - (u64 *) k);
+ memmove_u64s_down(k, (u64 *) k + next_good_key, (u64 *) vstruct_end(i) - (u64 *) k);
set_btree_node_need_rewrite(b);
}
fsck_err:
@@ -1186,7 +1186,7 @@ int bch2_btree_node_read_done(struct bch_fs *c, struct bch_dev *ca,
le64_to_cpu(i->journal_seq),
b->written, b->written + sectors, ptr_written);
- b->written += sectors;
+ b->written = min(b->written + sectors, btree_sectors(c));
if (blacklisted && !first)
continue;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 5988219c6908..e32fce4fd258 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2357,6 +2357,12 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
if (iter->update_path) {
bch2_path_put_nokeep(trans, iter->update_path,
iter->flags & BTREE_ITER_intent);
@@ -2622,6 +2628,12 @@ struct bkey_s_c bch2_btree_iter_peek_prev_min(struct btree_iter *iter, struct bp
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bpos_eq(end, POS_MIN));
+ int ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
while (1) {
k = __bch2_btree_iter_peek_prev(iter, search_key);
if (unlikely(!k.k))
@@ -2749,6 +2761,12 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_iter *iter)
bch2_btree_iter_verify_entry_exit(iter);
EBUG_ON(btree_iter_path(trans, iter)->level && (iter->flags & BTREE_ITER_with_key_cache));
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret)) {
+ k = bkey_s_c_err(ret);
+ goto out_no_locked;
+ }
+
/* extents can't span inode numbers: */
if ((iter->flags & BTREE_ITER_is_extents) &&
unlikely(iter->pos.offset == KEY_OFFSET_MAX)) {
@@ -3106,6 +3124,10 @@ void *__bch2_trans_kmalloc(struct btree_trans *trans, size_t size)
WARN_ON_ONCE(new_bytes > BTREE_TRANS_MEM_MAX);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (ret)
+ return ERR_PTR(ret);
+
struct btree_transaction_stats *s = btree_trans_stats(trans);
s->max_mem = max(s->max_mem, new_bytes);
@@ -3163,7 +3185,8 @@ out_new_mem:
if (old_bytes) {
trace_and_count(c, trans_restart_mem_realloced, trans, _RET_IP_, new_bytes);
- return ERR_PTR(btree_trans_restart(trans, BCH_ERR_transaction_restart_mem_realloced));
+ return ERR_PTR(btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_mem_realloced, _RET_IP_));
}
out_change_top:
p = trans->mem + trans->mem_top;
@@ -3271,6 +3294,14 @@ u32 bch2_trans_begin(struct btree_trans *trans)
trans->last_begin_ip = _RET_IP_;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (trans->restarted) {
+ trans->restart_count_this_trans++;
+ } else {
+ trans->restart_count_this_trans = 0;
+ }
+#endif
+
trans_set_locked(trans, false);
if (trans->restarted) {
diff --git a/fs/bcachefs/btree_iter.h b/fs/bcachefs/btree_iter.h
index b9538e6e6d65..b96157f3dc9c 100644
--- a/fs/bcachefs/btree_iter.h
+++ b/fs/bcachefs/btree_iter.h
@@ -355,6 +355,18 @@ static int btree_trans_restart(struct btree_trans *trans, int err)
return btree_trans_restart_ip(trans, err, _THIS_IP_);
}
+static inline int trans_maybe_inject_restart(struct btree_trans *trans, unsigned long ip)
+{
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ if (!(ktime_get_ns() & ~(~0ULL << min(63, (10 + trans->restart_count_this_trans))))) {
+ trace_and_count(trans->c, trans_restart_injected, trans, ip);
+ return btree_trans_restart_ip(trans,
+ BCH_ERR_transaction_restart_fault_inject, ip);
+ }
+#endif
+ return 0;
+}
+
bool bch2_btree_node_upgrade(struct btree_trans *,
struct btree_path *, unsigned);
@@ -739,7 +751,7 @@ transaction_restart: \
if (!_ret2) \
bch2_trans_verify_not_restarted(_trans, _restart_count);\
\
- _ret2 ?: trans_was_restarted(_trans, _restart_count); \
+ _ret2 ?: trans_was_restarted(_trans, _orig_restart_count); \
})
#define for_each_btree_key_max_continue(_trans, _iter, \
diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c
index 1821f40c161a..edce59433375 100644
--- a/fs/bcachefs/btree_key_cache.c
+++ b/fs/bcachefs/btree_key_cache.c
@@ -156,7 +156,7 @@ bkey_cached_alloc(struct btree_trans *trans, struct btree_path *path, unsigned k
}
if (ck) {
- bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0);
+ bch2_btree_lock_init(&ck->c, pcpu_readers ? SIX_LOCK_INIT_PCPU : 0, GFP_KERNEL);
ck->c.cached = true;
goto lock;
}
diff --git a/fs/bcachefs/btree_locking.c b/fs/bcachefs/btree_locking.c
index 10b805a60f52..caef65adeae4 100644
--- a/fs/bcachefs/btree_locking.c
+++ b/fs/bcachefs/btree_locking.c
@@ -7,9 +7,10 @@
static struct lock_class_key bch2_btree_node_lock_key;
void bch2_btree_lock_init(struct btree_bkey_cached_common *b,
- enum six_lock_init_flags flags)
+ enum six_lock_init_flags flags,
+ gfp_t gfp)
{
- __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags);
+ __six_lock_init(&b->lock, "b->c.lock", &bch2_btree_node_lock_key, flags, gfp);
lockdep_set_notrack_class(&b->lock);
}
diff --git a/fs/bcachefs/btree_locking.h b/fs/bcachefs/btree_locking.h
index b54ef48eb8cc..b33ab7af8440 100644
--- a/fs/bcachefs/btree_locking.h
+++ b/fs/bcachefs/btree_locking.h
@@ -13,7 +13,7 @@
#include "btree_iter.h"
#include "six.h"
-void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags);
+void bch2_btree_lock_init(struct btree_bkey_cached_common *, enum six_lock_init_flags, gfp_t gfp);
void bch2_trans_unlock_noassert(struct btree_trans *);
void bch2_trans_unlock_write(struct btree_trans *);
diff --git a/fs/bcachefs/btree_trans_commit.c b/fs/bcachefs/btree_trans_commit.c
index 2760dd9569ed..c4f524b2ca9a 100644
--- a/fs/bcachefs/btree_trans_commit.c
+++ b/fs/bcachefs/btree_trans_commit.c
@@ -999,6 +999,10 @@ int __bch2_trans_commit(struct btree_trans *trans, unsigned flags)
bch2_trans_verify_not_unlocked_or_in_restart(trans);
+ ret = trans_maybe_inject_restart(trans, _RET_IP_);
+ if (unlikely(ret))
+ goto out_reset;
+
if (!trans->nr_updates &&
!trans->journal_entries_u64s)
goto out_reset;
diff --git a/fs/bcachefs/btree_types.h b/fs/bcachefs/btree_types.h
index a6f251eb4164..a09cbe9cd94f 100644
--- a/fs/bcachefs/btree_types.h
+++ b/fs/bcachefs/btree_types.h
@@ -509,6 +509,9 @@ struct btree_trans {
bool notrace_relock_fail:1;
enum bch_errcode restarted:16;
u32 restart_count;
+#ifdef CONFIG_BCACHEFS_INJECT_TRANSACTION_RESTARTS
+ u32 restart_count_this_trans;
+#endif
u64 last_begin_time;
unsigned long last_begin_ip;
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 8f22ef9a7651..47d8690f01bf 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -126,10 +126,18 @@ bch2_trans_jset_entry_alloc(struct btree_trans *trans, unsigned u64s)
int bch2_btree_insert_clone_trans(struct btree_trans *, enum btree_id, struct bkey_i *);
+int bch2_btree_write_buffer_insert_err(struct btree_trans *,
+ enum btree_id, struct bkey_i *);
+
static inline int __must_check bch2_trans_update_buffered(struct btree_trans *trans,
enum btree_id btree,
struct bkey_i *k)
{
+ if (unlikely(!btree_type_uses_write_buffer(btree))) {
+ int ret = bch2_btree_write_buffer_insert_err(trans, btree, k);
+ dump_stack();
+ return ret;
+ }
/*
* Most updates skip the btree write buffer until journal replay is
* finished because synchronization with journal replay relies on having
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index f4aeadbe53c1..e4e7c804625e 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -681,9 +681,11 @@ static void btree_update_nodes_written(struct btree_update *as)
b = as->old_nodes[i];
+ bch2_trans_begin(trans);
btree_node_lock_nopath_nofail(trans, &b->c, SIX_LOCK_read);
seq = b->data ? b->data->keys.seq : 0;
six_unlock_read(&b->c.lock);
+ bch2_trans_unlock_long(trans);
if (seq == as->old_nodes_seq[i])
wait_on_bit_io(&b->flags, BTREE_NODE_write_in_flight_inner,
diff --git a/fs/bcachefs/btree_update_interior.h b/fs/bcachefs/btree_update_interior.h
index 7930ffea3075..26d646e1275c 100644
--- a/fs/bcachefs/btree_update_interior.h
+++ b/fs/bcachefs/btree_update_interior.h
@@ -278,12 +278,12 @@ static inline struct btree_node_entry *want_new_bset(struct bch_fs *c, struct bt
{
struct bset_tree *t = bset_tree_last(b);
struct btree_node_entry *bne = max(write_block(b),
- (void *) btree_bkey_last(b, bset_tree_last(b)));
+ (void *) btree_bkey_last(b, t));
ssize_t remaining_space =
__bch2_btree_u64s_remaining(b, bne->keys.start);
if (unlikely(bset_written(b, bset(b, t)))) {
- if (remaining_space > (ssize_t) (block_bytes(c) >> 3))
+ if (b->written + block_sectors(c) <= btree_sectors(c))
return bne;
} else {
if (unlikely(bset_u64s(t) * sizeof(u64) > btree_write_set_buffer(b)) &&
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index b56c4987b8c9..2c09d19dd621 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -264,6 +264,22 @@ out:
BUG_ON(wb->sorted.size < wb->flushing.keys.nr);
}
+int bch2_btree_write_buffer_insert_err(struct btree_trans *trans,
+ enum btree_id btree, struct bkey_i *k)
+{
+ struct bch_fs *c = trans->c;
+ struct printbuf buf = PRINTBUF;
+
+ prt_printf(&buf, "attempting to do write buffer update on non wb btree=");
+ bch2_btree_id_to_text(&buf, btree);
+ prt_str(&buf, "\n");
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(k));
+
+ bch2_fs_inconsistent(c, "%s", buf.buf);
+ printbuf_exit(&buf);
+ return -EROFS;
+}
+
static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
{
struct bch_fs *c = trans->c;
@@ -312,7 +328,10 @@ static int bch2_btree_write_buffer_flush_locked(struct btree_trans *trans)
darray_for_each(wb->sorted, i) {
struct btree_write_buffered_key *k = &wb->flushing.keys.data[i->idx];
- BUG_ON(!btree_type_uses_write_buffer(k->btree));
+ if (unlikely(!btree_type_uses_write_buffer(k->btree))) {
+ ret = bch2_btree_write_buffer_insert_err(trans, k->btree, &k->k);
+ goto err;
+ }
for (struct wb_key_ref *n = i + 1; n < min(i + 4, &darray_top(wb->sorted)); n++)
prefetch(&wb->flushing.keys.data[n->idx]);
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 337494facac6..642fbc60ecab 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -340,6 +340,7 @@ restart_drop_extra_replicas:
struct printbuf buf = PRINTBUF;
prt_str(&buf, "about to insert invalid key in data update path");
+ prt_printf(&buf, "\nop.nonce: %u", m->op.nonce);
prt_str(&buf, "\nold: ");
bch2_bkey_val_to_text(&buf, c, old);
prt_str(&buf, "\nk: ");
diff --git a/fs/bcachefs/dirent.h b/fs/bcachefs/dirent.h
index a633f83c1ac7..362b3b2f2f2e 100644
--- a/fs/bcachefs/dirent.h
+++ b/fs/bcachefs/dirent.h
@@ -31,11 +31,6 @@ static inline unsigned dirent_val_u64s(unsigned len)
sizeof(u64));
}
-static inline unsigned int dirent_occupied_size(const struct qstr *name)
-{
- return (BKEY_U64s + dirent_val_u64s(name->len)) * sizeof(u64);
-}
-
int bch2_dirent_read_target(struct btree_trans *, subvol_inum,
struct bkey_s_c_dirent, subvol_inum *);
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 5360cbb3ec29..f4372cafea2e 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -210,11 +210,13 @@ static inline void bch2_accounting_mem_read_counters(struct bch_accounting_mem *
static inline void bch2_accounting_mem_read(struct bch_fs *c, struct bpos p,
u64 *v, unsigned nr)
{
+ percpu_down_read(&c->mark_lock);
struct bch_accounting_mem *acc = &c->accounting;
unsigned idx = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
accounting_pos_cmp, &p);
bch2_accounting_mem_read_counters(acc, idx, v, nr, false);
+ percpu_up_read(&c->mark_lock);
}
static inline struct bversion journal_pos_to_bversion(struct journal_res *res, unsigned offset)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index 05d5f71a7ca9..2d8042f853dc 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -99,7 +99,7 @@ static inline bool ptr_better(struct bch_fs *c,
/* Pick at random, biased in favor of the faster device: */
- return bch2_rand_range(l1 + l2) > l1;
+ return bch2_get_random_u64_below(l1 + l2) > l1;
}
if (bch2_force_reconstruct_read)
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 620b284aa34f..204d765dd74c 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -704,7 +704,7 @@ static inline bool bch2_extent_ptr_eq(struct bch_extent_ptr ptr1,
ptr1.unwritten == ptr2.unwritten &&
ptr1.offset == ptr2.offset &&
ptr1.dev == ptr2.dev &&
- ptr1.dev == ptr2.dev);
+ ptr1.gen == ptr2.gen);
}
void bch2_ptr_swab(struct bkey_s);
diff --git a/fs/bcachefs/fs-common.c b/fs/bcachefs/fs-common.c
index d70d9f634cea..2c3d46ac70c6 100644
--- a/fs/bcachefs/fs-common.c
+++ b/fs/bcachefs/fs-common.c
@@ -152,7 +152,6 @@ int bch2_create_trans(struct btree_trans *trans,
if (is_subdir_for_nlink(new_inode))
dir_u->bi_nlink++;
dir_u->bi_mtime = dir_u->bi_ctime = now;
- dir_u->bi_size += dirent_occupied_size(name);
ret = bch2_inode_write(trans, &dir_iter, dir_u);
if (ret)
@@ -221,7 +220,6 @@ int bch2_link_trans(struct btree_trans *trans,
}
dir_u->bi_mtime = dir_u->bi_ctime = now;
- dir_u->bi_size += dirent_occupied_size(name);
dir_hash = bch2_hash_info_init(c, dir_u);
@@ -324,7 +322,6 @@ int bch2_unlink_trans(struct btree_trans *trans,
dir_u->bi_mtime = dir_u->bi_ctime = inode_u->bi_ctime = now;
dir_u->bi_nlink -= is_subdir_for_nlink(inode_u);
- dir_u->bi_size -= dirent_occupied_size(name);
ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc,
&dir_hash, &dirent_iter,
@@ -463,14 +460,6 @@ int bch2_rename_trans(struct btree_trans *trans,
goto err;
}
- if (mode == BCH_RENAME) {
- src_dir_u->bi_size -= dirent_occupied_size(src_name);
- dst_dir_u->bi_size += dirent_occupied_size(dst_name);
- }
-
- if (mode == BCH_RENAME_OVERWRITE)
- src_dir_u->bi_size -= dirent_occupied_size(src_name);
-
if (src_inode_u->bi_parent_subvol)
src_inode_u->bi_parent_subvol = dst_dir.subvol;
diff --git a/fs/bcachefs/fs-io.c b/fs/bcachefs/fs-io.c
index 94bf34b9b65f..717e7b94c66f 100644
--- a/fs/bcachefs/fs-io.c
+++ b/fs/bcachefs/fs-io.c
@@ -466,6 +466,7 @@ int bchfs_truncate(struct mnt_idmap *idmap,
ret = bch2_truncate_folio(inode, iattr->ia_size);
if (unlikely(ret < 0))
goto err;
+ ret = 0;
truncate_setsize(&inode->v, iattr->ia_size);
diff --git a/fs/bcachefs/fs-ioctl.c b/fs/bcachefs/fs-ioctl.c
index 15725b4ce393..595b57fabc9a 100644
--- a/fs/bcachefs/fs-ioctl.c
+++ b/fs/bcachefs/fs-ioctl.c
@@ -511,10 +511,6 @@ static long bch2_ioctl_subvolume_destroy(struct bch_fs *c, struct file *filp,
ret = -EXDEV;
goto err;
}
- if (!d_is_positive(victim)) {
- ret = -ENOENT;
- goto err;
- }
ret = __bch2_unlink(dir, victim, true);
if (!ret) {
fsnotify_rmdir(dir, victim);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 90ade8f648d9..b2669d7ffec5 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -858,10 +858,10 @@ err:
return bch2_err_class(ret);
}
-static int bch2_mkdir(struct mnt_idmap *idmap,
- struct inode *vdir, struct dentry *dentry, umode_t mode)
+static struct dentry *bch2_mkdir(struct mnt_idmap *idmap,
+ struct inode *vdir, struct dentry *dentry, umode_t mode)
{
- return bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0);
+ return ERR_PTR(bch2_mknod(idmap, vdir, dentry, mode|S_IFDIR, 0));
}
static int bch2_rename2(struct mnt_idmap *idmap,
@@ -2396,7 +2396,7 @@ static struct file_system_type bcache_fs_type = {
.name = "bcachefs",
.init_fs_context = bch2_init_fs_context,
.kill_sb = bch2_kill_sb,
- .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
+ .fs_flags = FS_REQUIRES_DEV | FS_ALLOW_IDMAP | FS_LBS,
};
MODULE_ALIAS_FS("bcachefs");
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 53a421ff136d..0e85131d0af8 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -823,6 +823,7 @@ struct inode_walker_entry {
struct bch_inode_unpacked inode;
u32 snapshot;
u64 count;
+ u64 i_size;
};
struct inode_walker {
@@ -910,8 +911,9 @@ found:
if (k.k->p.snapshot != i->snapshot && !is_whiteout) {
struct inode_walker_entry new = *i;
- new.snapshot = k.k->p.snapshot;
- new.count = 0;
+ new.snapshot = k.k->p.snapshot;
+ new.count = 0;
+ new.i_size = 0;
struct printbuf buf = PRINTBUF;
bch2_bkey_val_to_text(&buf, c, k);
@@ -1116,37 +1118,6 @@ err:
return ret;
}
-static int check_directory_size(struct btree_trans *trans,
- struct bch_inode_unpacked *inode_u,
- struct bkey_s_c inode_k, bool *write_inode)
-{
- struct btree_iter iter;
- struct bkey_s_c k;
- u64 new_size = 0;
- int ret;
-
- for_each_btree_key_max_norestart(trans, iter, BTREE_ID_dirents,
- SPOS(inode_k.k->p.offset, 0, inode_k.k->p.snapshot),
- POS(inode_k.k->p.offset, U64_MAX),
- 0, k, ret) {
- if (k.k->type != KEY_TYPE_dirent)
- continue;
-
- struct bkey_s_c_dirent dirent = bkey_s_c_to_dirent(k);
- struct qstr name = bch2_dirent_get_name(dirent);
-
- new_size += dirent_occupied_size(&name);
- }
- bch2_trans_iter_exit(trans, &iter);
-
- if (!ret && inode_u->bi_size != new_size) {
- inode_u->bi_size = new_size;
- *write_inode = true;
- }
-
- return ret;
-}
-
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -1335,16 +1306,6 @@ static int check_inode(struct btree_trans *trans,
u.bi_journal_seq = journal_cur_seq(&c->journal);
do_update = true;
}
-
- if (S_ISDIR(u.bi_mode)) {
- ret = check_directory_size(trans, &u, k, &do_update);
-
- fsck_err_on(ret,
- trans, directory_size_mismatch,
- "directory inode %llu:%u with the mismatch directory size",
- u.bi_inum, k.k->p.snapshot);
- ret = 0;
- }
do_update:
if (do_update) {
ret = __bch2_fsck_write_inode(trans, &u);
@@ -2017,7 +1978,7 @@ fsck_err:
return ret;
}
-static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w)
+static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w)
{
u32 restart_count = trans->restart_count;
return check_subdir_count_notnested(trans, w) ?:
@@ -2367,7 +2328,7 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
goto out;
if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) {
- ret = check_subdir_count(trans, dir);
+ ret = check_subdir_dirents_count(trans, dir);
if (ret)
goto err;
}
@@ -2457,9 +2418,11 @@ static int check_dirent(struct btree_trans *trans, struct btree_iter *iter,
if (ret)
goto err;
- if (d.v->d_type == DT_DIR)
- for_each_visible_inode(c, s, dir, d.k->p.snapshot, i)
+ for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) {
+ if (d.v->d_type == DT_DIR)
i->count++;
+ i->i_size += bkey_bytes(d.k);
+ }
out:
err:
fsck_err:
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index 04ec05206f8c..339b80770f1d 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -1198,6 +1198,7 @@ void bch2_inode_opts_get(struct bch_io_opts *opts, struct bch_fs *c,
opts->_name##_from_inode = true; \
} else { \
opts->_name = c->opts._name; \
+ opts->_name##_from_inode = false; \
}
BCH_INODE_OPTS()
#undef x
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c
index 8c7b2d3d779d..aa91fcf51eec 100644
--- a/fs/bcachefs/io_read.c
+++ b/fs/bcachefs/io_read.c
@@ -59,7 +59,7 @@ static bool bch2_target_congested(struct bch_fs *c, u16 target)
}
rcu_read_unlock();
- return bch2_rand_range(nr * CONGESTED_MAX) < total;
+ return get_random_u32_below(nr * CONGESTED_MAX) < total;
}
#else
@@ -951,12 +951,6 @@ retry_pick:
goto retry_pick;
}
- /*
- * Unlock the iterator while the btree node's lock is still in
- * cache, before doing the IO:
- */
- bch2_trans_unlock(trans);
-
if (flags & BCH_READ_NODECODE) {
/*
* can happen if we retry, and the extent we were going to read
@@ -1113,6 +1107,15 @@ get_bio:
trace_and_count(c, read_split, &orig->bio);
}
+ /*
+ * Unlock the iterator while the btree node's lock is still in
+ * cache, before doing the IO:
+ */
+ if (!(flags & BCH_READ_IN_RETRY))
+ bch2_trans_unlock(trans);
+ else
+ bch2_trans_unlock_long(trans);
+
if (!rbio->pick.idx) {
if (unlikely(!rbio->have_ioref)) {
struct printbuf buf = PRINTBUF;
@@ -1160,6 +1163,8 @@ out:
if (likely(!(flags & BCH_READ_IN_RETRY))) {
return 0;
} else {
+ bch2_trans_unlock(trans);
+
int ret;
rbio->context = RBIO_CONTEXT_UNBOUND;
diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c
index dd508d93e9fc..03892388832b 100644
--- a/fs/bcachefs/io_write.c
+++ b/fs/bcachefs/io_write.c
@@ -411,6 +411,16 @@ void bch2_write_op_error(struct printbuf *out, struct bch_write_op *op)
__bch2_write_op_error(out, op, op->pos.offset);
}
+static void bch2_write_op_error_trans(struct btree_trans *trans, struct printbuf *out,
+ struct bch_write_op *op, u64 offset)
+{
+ bch2_inum_offset_err_msg_trans(trans, out,
+ (subvol_inum) { op->subvol, op->pos.inode, },
+ offset << 9);
+ prt_printf(out, "write error%s: ",
+ op->flags & BCH_WRITE_MOVE ? "(internal move)" : "");
+}
+
void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
enum bch_data_type type,
const struct bkey_i *k,
@@ -1193,7 +1203,7 @@ static void bch2_nocow_write_convert_unwritten(struct bch_write_op *op)
struct bkey_i *insert = bch2_keylist_front(&op->insert_keys);
struct printbuf buf = PRINTBUF;
- __bch2_write_op_error(&buf, op, bkey_start_offset(&insert->k));
+ bch2_write_op_error_trans(trans, &buf, op, bkey_start_offset(&insert->k));
prt_printf(&buf, "btree update error: %s", bch2_err_str(ret));
bch_err_ratelimited(c, "%s", buf.buf);
printbuf_exit(&buf);
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index 24c294d4634e..05b1250619ec 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -1021,8 +1021,8 @@ struct journal_buf *bch2_next_write_buffer_flush_journal_buf(struct journal *j,
/* allocate journal on a device: */
-static int __bch2_set_nr_journal_buckets(struct bch_dev *ca, unsigned nr,
- bool new_fs, struct closure *cl)
+static int bch2_set_nr_journal_buckets_iter(struct bch_dev *ca, unsigned nr,
+ bool new_fs, struct closure *cl)
{
struct bch_fs *c = ca->fs;
struct journal_device *ja = &ca->journal;
@@ -1150,26 +1150,20 @@ err_free:
return ret;
}
-/*
- * Allocate more journal space at runtime - not currently making use if it, but
- * the code works:
- */
-int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
- unsigned nr)
+static int bch2_set_nr_journal_buckets_loop(struct bch_fs *c, struct bch_dev *ca,
+ unsigned nr, bool new_fs)
{
struct journal_device *ja = &ca->journal;
- struct closure cl;
int ret = 0;
+ struct closure cl;
closure_init_stack(&cl);
- down_write(&c->state_lock);
-
/* don't handle reducing nr of buckets yet: */
if (nr < ja->nr)
- goto unlock;
+ return 0;
- while (ja->nr < nr) {
+ while (!ret && ja->nr < nr) {
struct disk_reservation disk_res = { 0, 0, 0 };
/*
@@ -1182,25 +1176,38 @@ int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
* filesystem-wide allocation will succeed, this is a device
* specific allocation - we can hang here:
*/
+ if (!new_fs) {
+ ret = bch2_disk_reservation_get(c, &disk_res,
+ bucket_to_sector(ca, nr - ja->nr), 1, 0);
+ if (ret)
+ break;
+ }
- ret = bch2_disk_reservation_get(c, &disk_res,
- bucket_to_sector(ca, nr - ja->nr), 1, 0);
- if (ret)
- break;
+ ret = bch2_set_nr_journal_buckets_iter(ca, nr, new_fs, &cl);
- ret = __bch2_set_nr_journal_buckets(ca, nr, false, &cl);
+ if (ret == -BCH_ERR_bucket_alloc_blocked ||
+ ret == -BCH_ERR_open_buckets_empty)
+ ret = 0; /* wait and retry */
bch2_disk_reservation_put(c, &disk_res);
-
closure_sync(&cl);
-
- if (ret && ret != -BCH_ERR_bucket_alloc_blocked)
- break;
}
- bch_err_fn(c, ret);
-unlock:
+ return ret;
+}
+
+/*
+ * Allocate more journal space at runtime - not currently making use if it, but
+ * the code works:
+ */
+int bch2_set_nr_journal_buckets(struct bch_fs *c, struct bch_dev *ca,
+ unsigned nr)
+{
+ down_write(&c->state_lock);
+ int ret = bch2_set_nr_journal_buckets_loop(c, ca, nr, false);
up_write(&c->state_lock);
+
+ bch_err_fn(c, ret);
return ret;
}
@@ -1226,7 +1233,7 @@ int bch2_dev_journal_alloc(struct bch_dev *ca, bool new_fs)
min(1 << 13,
(1 << 24) / ca->mi.bucket_size));
- ret = __bch2_set_nr_journal_buckets(ca, nr, new_fs, NULL);
+ ret = bch2_set_nr_journal_buckets_loop(ca->fs, ca, nr, new_fs);
err:
bch_err_fn(ca, ret);
return ret;
diff --git a/fs/bcachefs/journal_reclaim.c b/fs/bcachefs/journal_reclaim.c
index 6a9cefb635d6..d373cd181a7f 100644
--- a/fs/bcachefs/journal_reclaim.c
+++ b/fs/bcachefs/journal_reclaim.c
@@ -384,12 +384,16 @@ void bch2_journal_pin_drop(struct journal *j,
spin_unlock(&j->lock);
}
-static enum journal_pin_type journal_pin_type(journal_pin_flush_fn fn)
+static enum journal_pin_type journal_pin_type(struct journal_entry_pin *pin,
+ journal_pin_flush_fn fn)
{
if (fn == bch2_btree_node_flush0 ||
- fn == bch2_btree_node_flush1)
- return JOURNAL_PIN_TYPE_btree;
- else if (fn == bch2_btree_key_cache_journal_flush)
+ fn == bch2_btree_node_flush1) {
+ unsigned idx = fn == bch2_btree_node_flush1;
+ struct btree *b = container_of(pin, struct btree, writes[idx].journal);
+
+ return JOURNAL_PIN_TYPE_btree0 - b->c.level;
+ } else if (fn == bch2_btree_key_cache_journal_flush)
return JOURNAL_PIN_TYPE_key_cache;
else
return JOURNAL_PIN_TYPE_other;
@@ -441,7 +445,7 @@ void bch2_journal_pin_copy(struct journal *j,
bool reclaim = __journal_pin_drop(j, dst);
- bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(flush_fn));
+ bch2_journal_pin_set_locked(j, seq, dst, flush_fn, journal_pin_type(dst, flush_fn));
if (reclaim)
bch2_journal_reclaim_fast(j);
@@ -465,7 +469,7 @@ void bch2_journal_pin_set(struct journal *j, u64 seq,
bool reclaim = __journal_pin_drop(j, pin);
- bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(flush_fn));
+ bch2_journal_pin_set_locked(j, seq, pin, flush_fn, journal_pin_type(pin, flush_fn));
if (reclaim)
bch2_journal_reclaim_fast(j);
@@ -587,7 +591,7 @@ static size_t journal_flush_pins(struct journal *j,
spin_lock(&j->lock);
/* Pin might have been dropped or rearmed: */
if (likely(!err && !j->flush_in_progress_dropped))
- list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(flush_fn)]);
+ list_move(&pin->list, &journal_seq_pin(j, seq)->flushed[journal_pin_type(pin, flush_fn)]);
j->flush_in_progress = NULL;
j->flush_in_progress_dropped = false;
spin_unlock(&j->lock);
@@ -869,18 +873,13 @@ static int journal_flush_done(struct journal *j, u64 seq_to_flush,
mutex_lock(&j->reclaim_lock);
- if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
- BIT(JOURNAL_PIN_TYPE_key_cache)|
- BIT(JOURNAL_PIN_TYPE_other))) {
- *did_work = true;
- goto unlock;
- }
-
- if (journal_flush_pins_or_still_flushing(j, seq_to_flush,
- BIT(JOURNAL_PIN_TYPE_btree))) {
- *did_work = true;
- goto unlock;
- }
+ for (int type = JOURNAL_PIN_TYPE_NR - 1;
+ type >= 0;
+ --type)
+ if (journal_flush_pins_or_still_flushing(j, seq_to_flush, BIT(type))) {
+ *did_work = true;
+ goto unlock;
+ }
if (seq_to_flush > journal_cur_seq(j))
bch2_journal_entry_close(j);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index a198a81d7478..1ef3a28ed6ab 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -53,7 +53,10 @@ struct journal_buf {
*/
enum journal_pin_type {
- JOURNAL_PIN_TYPE_btree,
+ JOURNAL_PIN_TYPE_btree3,
+ JOURNAL_PIN_TYPE_btree2,
+ JOURNAL_PIN_TYPE_btree1,
+ JOURNAL_PIN_TYPE_btree0,
JOURNAL_PIN_TYPE_key_cache,
JOURNAL_PIN_TYPE_other,
JOURNAL_PIN_TYPE_NR,
diff --git a/fs/bcachefs/movinggc.c b/fs/bcachefs/movinggc.c
index 21805509ab9e..6718dc37c5a3 100644
--- a/fs/bcachefs/movinggc.c
+++ b/fs/bcachefs/movinggc.c
@@ -74,20 +74,14 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
struct move_bucket *b, u64 time)
{
struct bch_fs *c = trans->c;
- struct btree_iter iter;
- struct bkey_s_c k;
- struct bch_alloc_v4 _a;
- const struct bch_alloc_v4 *a;
- int ret;
- if (bch2_bucket_is_open(trans->c,
- b->k.bucket.inode,
- b->k.bucket.offset))
+ if (bch2_bucket_is_open(c, b->k.bucket.inode, b->k.bucket.offset))
return 0;
- k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
- b->k.bucket, BTREE_ITER_cached);
- ret = bkey_err(k);
+ struct btree_iter iter;
+ struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_alloc,
+ b->k.bucket, BTREE_ITER_cached);
+ int ret = bkey_err(k);
if (ret)
return ret;
@@ -95,13 +89,18 @@ static int bch2_bucket_is_movable(struct btree_trans *trans,
if (!ca)
goto out;
- a = bch2_alloc_to_v4(k, &_a);
+ if (ca->mi.state != BCH_MEMBER_STATE_rw ||
+ !bch2_dev_is_online(ca))
+ goto out_put;
+
+ struct bch_alloc_v4 _a;
+ const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a);
b->k.gen = a->gen;
b->sectors = bch2_bucket_sectors_dirty(*a);
u64 lru_idx = alloc_lru_idx_fragmentation(*a, ca);
ret = lru_idx && lru_idx <= time;
-
+out_put:
bch2_dev_put(ca);
out:
bch2_trans_iter_exit(trans, &iter);
diff --git a/fs/bcachefs/reflink.c b/fs/bcachefs/reflink.c
index 93ba4f4e47ca..441e648f28b5 100644
--- a/fs/bcachefs/reflink.c
+++ b/fs/bcachefs/reflink.c
@@ -172,7 +172,7 @@ static int bch2_indirect_extent_missing_error(struct btree_trans *trans,
bool should_commit)
{
if (REFLINK_P_ERROR(p.v))
- return -BCH_ERR_missing_indirect_extent;
+ return 0;
struct bch_fs *c = trans->c;
u64 live_start = REFLINK_P_IDX(p.v);
@@ -259,8 +259,6 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
return k;
if (unlikely(!bkey_extent_is_reflink_data(k.k))) {
- bch2_trans_iter_exit(trans, iter);
-
unsigned size = min((u64) k.k->size,
REFLINK_P_IDX(p.v) + p.k->size + le32_to_cpu(p.v->back_pad) -
reflink_offset);
@@ -268,14 +266,16 @@ struct bkey_s_c bch2_lookup_indirect_extent(struct btree_trans *trans,
int ret = bch2_indirect_extent_missing_error(trans, p, reflink_offset,
k.k->p.offset, should_commit);
- if (ret)
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
return bkey_s_c_err(ret);
+ }
} else if (unlikely(REFLINK_P_ERROR(p.v))) {
- bch2_trans_iter_exit(trans, iter);
-
int ret = bch2_indirect_extent_not_missing(trans, p, should_commit);
- if (ret)
+ if (ret) {
+ bch2_trans_iter_exit(trans, iter);
return bkey_s_c_err(ret);
+ }
}
*offset_into_extent = reflink_offset - bkey_start_offset(k.k);
@@ -300,7 +300,7 @@ static int trans_trigger_reflink_p_segment(struct btree_trans *trans,
if (ret)
return ret;
- if (bkey_deleted(k.k)) {
+ if (!bkey_refcount_c(k)) {
if (!(flags & BTREE_TRIGGER_overwrite))
ret = -BCH_ERR_missing_indirect_extent;
goto next;
diff --git a/fs/bcachefs/sb-downgrade.c b/fs/bcachefs/sb-downgrade.c
index 14f6b6a5fb38..051214fdc735 100644
--- a/fs/bcachefs/sb-downgrade.c
+++ b/fs/bcachefs/sb-downgrade.c
@@ -90,10 +90,7 @@
BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
BCH_FSCK_ERR_accounting_mismatch, \
BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
- BCH_FSCK_ERR_accounting_key_junk_at_end) \
- x(directory_size, \
- BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
- BCH_FSCK_ERR_directory_size_mismatch) \
+ BCH_FSCK_ERR_accounting_key_junk_at_end)
#define DOWNGRADE_TABLE() \
x(bucket_stripe_sectors, \
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index ea0a18364751..b86ec013d7d7 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -180,9 +180,9 @@ enum bch_fsck_flags {
x(ptr_crc_nonce_mismatch, 162, 0) \
x(ptr_stripe_redundant, 163, 0) \
x(reservation_key_nr_replicas_invalid, 164, 0) \
- x(reflink_v_refcount_wrong, 165, 0) \
+ x(reflink_v_refcount_wrong, 165, FSCK_AUTOFIX) \
x(reflink_v_pos_bad, 292, 0) \
- x(reflink_p_to_missing_reflink_v, 166, 0) \
+ x(reflink_p_to_missing_reflink_v, 166, FSCK_AUTOFIX) \
x(reflink_refcount_underflow, 293, 0) \
x(stripe_pos_bad, 167, 0) \
x(stripe_val_size_bad, 168, 0) \
diff --git a/fs/bcachefs/six.c b/fs/bcachefs/six.c
index 7e7c66a1e1a6..7c403427fbdb 100644
--- a/fs/bcachefs/six.c
+++ b/fs/bcachefs/six.c
@@ -850,7 +850,8 @@ void six_lock_exit(struct six_lock *lock)
EXPORT_SYMBOL_GPL(six_lock_exit);
void __six_lock_init(struct six_lock *lock, const char *name,
- struct lock_class_key *key, enum six_lock_init_flags flags)
+ struct lock_class_key *key, enum six_lock_init_flags flags,
+ gfp_t gfp)
{
atomic_set(&lock->state, 0);
raw_spin_lock_init(&lock->wait_lock);
@@ -873,7 +874,7 @@ void __six_lock_init(struct six_lock *lock, const char *name,
* failure if they wish by checking lock->readers, but generally
* will not want to treat it as an error.
*/
- lock->readers = alloc_percpu(unsigned);
+ lock->readers = alloc_percpu_gfp(unsigned, gfp);
}
#endif
}
diff --git a/fs/bcachefs/six.h b/fs/bcachefs/six.h
index c142e06b7a3a..59b851cf8bac 100644
--- a/fs/bcachefs/six.h
+++ b/fs/bcachefs/six.h
@@ -164,18 +164,19 @@ enum six_lock_init_flags {
};
void __six_lock_init(struct six_lock *lock, const char *name,
- struct lock_class_key *key, enum six_lock_init_flags flags);
+ struct lock_class_key *key, enum six_lock_init_flags flags,
+ gfp_t gfp);
/**
* six_lock_init - initialize a six lock
* @lock: lock to initialize
* @flags: optional flags, i.e. SIX_LOCK_INIT_PCPU
*/
-#define six_lock_init(lock, flags) \
+#define six_lock_init(lock, flags, gfp) \
do { \
static struct lock_class_key __key; \
\
- __six_lock_init((lock), #lock, &__key, flags); \
+ __six_lock_init((lock), #lock, &__key, flags, gfp); \
} while (0)
/**
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index 8037ccbacf6a..a81a7b6c0989 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -69,14 +69,20 @@ enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_meta
return v;
}
-void bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
+bool bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version)
{
- mutex_lock(&c->sb_lock);
- SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
- max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
- c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field);
- bch2_write_super(c);
- mutex_unlock(&c->sb_lock);
+ bool ret = (c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) &&
+ version <= c->sb.version_incompat_allowed;
+
+ if (ret) {
+ mutex_lock(&c->sb_lock);
+ SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb,
+ max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version));
+ bch2_write_super(c);
+ mutex_unlock(&c->sb_lock);
+ }
+
+ return ret;
}
const char * const bch2_sb_fields[] = {
@@ -1219,9 +1225,11 @@ void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat)
c->disk_sb.sb->version = cpu_to_le16(new_version);
c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL);
- if (incompat)
+ if (incompat) {
SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb,
max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version));
+ c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_FEATURE_incompat_version_field);
+ }
}
static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f,
diff --git a/fs/bcachefs/super-io.h b/fs/bcachefs/super-io.h
index f1ab4f943720..b4cff9ebdebb 100644
--- a/fs/bcachefs/super-io.h
+++ b/fs/bcachefs/super-io.h
@@ -21,17 +21,14 @@ static inline bool bch2_version_compatible(u16 version)
void bch2_version_to_text(struct printbuf *, enum bcachefs_metadata_version);
enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version);
-void bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version);
+bool bch2_set_version_incompat(struct bch_fs *, enum bcachefs_metadata_version);
static inline bool bch2_request_incompat_feature(struct bch_fs *c,
enum bcachefs_metadata_version version)
{
- if (unlikely(version > c->sb.version_incompat)) {
- if (version > c->sb.version_incompat_allowed)
- return false;
- bch2_set_version_incompat(c, version);
- }
- return true;
+ return likely(version <= c->sb.version_incompat)
+ ? true
+ : bch2_set_version_incompat(c, version);
}
static inline size_t bch2_sb_field_bytes(struct bch_sb_field *f)
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index 6d97d412fed9..0459c875e189 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -1811,7 +1811,11 @@ int bch2_dev_add(struct bch_fs *c, const char *path)
goto err_late;
up_write(&c->state_lock);
- return 0;
+out:
+ printbuf_exit(&label);
+ printbuf_exit(&errbuf);
+ bch_err_fn(c, ret);
+ return ret;
err_unlock:
mutex_unlock(&c->sb_lock);
@@ -1820,10 +1824,7 @@ err:
if (ca)
bch2_dev_free(ca);
bch2_free_super(&sb);
- printbuf_exit(&label);
- printbuf_exit(&errbuf);
- bch_err_fn(c, ret);
- return ret;
+ goto out;
err_late:
up_write(&c->state_lock);
ca = NULL;
diff --git a/fs/bcachefs/util.c b/fs/bcachefs/util.c
index e0a876cbaa6b..da2cd11b3025 100644
--- a/fs/bcachefs/util.c
+++ b/fs/bcachefs/util.c
@@ -653,19 +653,25 @@ int bch2_bio_alloc_pages(struct bio *bio, size_t size, gfp_t gfp_mask)
return 0;
}
-size_t bch2_rand_range(size_t max)
+u64 bch2_get_random_u64_below(u64 ceil)
{
- size_t rand;
+ if (ceil <= U32_MAX)
+ return __get_random_u32_below(ceil);
- if (!max)
- return 0;
+ /* this is the same (clever) algorithm as in __get_random_u32_below() */
+ u64 rand = get_random_u64();
+ u64 mult = ceil * rand;
- do {
- rand = get_random_long();
- rand &= roundup_pow_of_two(max) - 1;
- } while (rand >= max);
+ if (unlikely(mult < ceil)) {
+ u64 bound;
+ div64_u64_rem(-ceil, ceil, &bound);
+ while (unlikely(mult < bound)) {
+ rand = get_random_u64();
+ mult = ceil * rand;
+ }
+ }
- return rand;
+ return mul_u64_u64_shr(ceil, rand, 64);
}
void memcpy_to_bio(struct bio *dst, struct bvec_iter dst_iter, const void *src)
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index e7c3541b38f3..f4a4783219d9 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -401,7 +401,7 @@ do { \
_ret; \
})
-size_t bch2_rand_range(size_t);
+u64 bch2_get_random_u64_below(u64);
void memcpy_to_bio(struct bio *, struct bvec_iter, const void *);
void memcpy_from_bio(void *, struct bio *, struct bvec_iter);