summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/bcachefs/backpointers.c2
-rw-r--r--fs/bcachefs/bcachefs.h17
-rw-r--r--fs/bcachefs/bcachefs_format.h37
-rw-r--r--fs/bcachefs/btree_gc.c89
-rw-r--r--fs/bcachefs/btree_iter.c4
-rw-r--r--fs/bcachefs/btree_node_scan.c4
-rw-r--r--fs/bcachefs/btree_update.c16
-rw-r--r--fs/bcachefs/btree_update.h17
-rw-r--r--fs/bcachefs/btree_update_interior.c14
-rw-r--r--fs/bcachefs/btree_write_buffer.c10
-rw-r--r--fs/bcachefs/btree_write_buffer.h2
-rw-r--r--fs/bcachefs/data_update.c11
-rw-r--r--fs/bcachefs/data_update.h1
-rw-r--r--fs/bcachefs/dirent.c2
-rw-r--r--fs/bcachefs/disk_accounting.c3
-rw-r--r--fs/bcachefs/disk_accounting.h10
-rw-r--r--fs/bcachefs/ec.c45
-rw-r--r--fs/bcachefs/error.h3
-rw-r--r--fs/bcachefs/extent_update.c56
-rw-r--r--fs/bcachefs/extent_update.h2
-rw-r--r--fs/bcachefs/extents.c17
-rw-r--r--fs/bcachefs/extents.h1
-rw-r--r--fs/bcachefs/fs-io-buffered.c30
-rw-r--r--fs/bcachefs/fs.c17
-rw-r--r--fs/bcachefs/fsck.c5
-rw-r--r--fs/bcachefs/inode.c11
-rw-r--r--fs/bcachefs/inode.h2
-rw-r--r--fs/bcachefs/inode_format.h3
-rw-r--r--fs/bcachefs/migrate.c4
-rw-r--r--fs/bcachefs/move.c22
-rw-r--r--fs/bcachefs/namei.c3
-rw-r--r--fs/bcachefs/rebalance.c7
-rw-r--r--fs/bcachefs/recovery.c2
-rw-r--r--fs/bcachefs/sb-counters_format.h4
-rw-r--r--fs/bcachefs/snapshot.c3
-rw-r--r--fs/bcachefs/snapshot.h15
-rw-r--r--fs/bcachefs/str_hash.h14
-rw-r--r--fs/bcachefs/super-io.c2
-rw-r--r--fs/bcachefs/super.c5
-rw-r--r--fs/bcachefs/trace.h5
-rw-r--r--fs/bcachefs/two_state_shared_lock.h31
-rw-r--r--fs/bcachefs/xattr.c19
42 files changed, 402 insertions, 165 deletions
diff --git a/fs/bcachefs/backpointers.c b/fs/bcachefs/backpointers.c
index 45d3db41225a..c43aaab4c108 100644
--- a/fs/bcachefs/backpointers.c
+++ b/fs/bcachefs/backpointers.c
@@ -809,6 +809,8 @@ static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans,
for (enum btree_id btree_id = 0;
btree_id < btree_id_nr_alive(c);
btree_id++) {
+ /* btree_type_has_ptrs should probably include BTREE_ID_stripes,
+ * definitely her... */
int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1;
ret = commit_do(trans, NULL, NULL,
diff --git a/fs/bcachefs/bcachefs.h b/fs/bcachefs/bcachefs.h
index cdf593c59922..16d08dfb5f19 100644
--- a/fs/bcachefs/bcachefs.h
+++ b/fs/bcachefs/bcachefs.h
@@ -386,14 +386,6 @@ do { \
##__VA_ARGS__, bch2_err_str(_ret)); \
} while (0)
-static inline int __bch2_err_trace(struct bch_fs *c, int err)
-{
- trace_error_throw(c, err, _THIS_IP_);
- return err;
-}
-
-#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err)
-
/* Parameters that are useful for debugging, but should always be compiled in: */
#define BCH_DEBUG_PARAMS_ALWAYS() \
BCH_DEBUG_PARAM(key_merging_disabled, \
@@ -1153,6 +1145,15 @@ struct bch_fs {
struct mutex fsck_error_counts_lock;
};
+static inline int __bch2_err_trace(struct bch_fs *c, int err)
+{
+ this_cpu_inc(c->counters[BCH_COUNTER_error_throw]);
+ trace_error_throw(c, err, _THIS_IP_);
+ return err;
+}
+
+#define bch_err_throw(_c, _err) __bch2_err_trace(_c, -BCH_ERR_##_err)
+
extern struct wait_queue_head bch2_read_only_wait;
static inline bool bch2_ro_ref_tryget(struct bch_fs *c)
diff --git a/fs/bcachefs/bcachefs_format.h b/fs/bcachefs/bcachefs_format.h
index a8f59522e258..b2de993d802b 100644
--- a/fs/bcachefs/bcachefs_format.h
+++ b/fs/bcachefs/bcachefs_format.h
@@ -706,7 +706,8 @@ struct bch_sb_field_ext {
x(snapshot_deletion_v2, BCH_VERSION(1, 26)) \
x(fast_device_removal, BCH_VERSION(1, 27)) \
x(inode_has_case_insensitive, BCH_VERSION(1, 28)) \
- x(extent_snapshot_whiteouts, BCH_VERSION(1, 29))
+ x(extent_snapshot_whiteouts, BCH_VERSION(1, 29)) \
+ x(31bit_dirent_offset, BCH_VERSION(1, 30))
enum bcachefs_metadata_version {
bcachefs_metadata_version_min = 9,
@@ -1378,7 +1379,8 @@ enum btree_id_flags {
BIT_ULL(KEY_TYPE_alloc_v4)) \
x(quotas, 5, 0, \
BIT_ULL(KEY_TYPE_quota)) \
- x(stripes, 6, 0, \
+ x(stripes, 6, \
+ BTREE_IS_data, \
BIT_ULL(KEY_TYPE_stripe)) \
x(reflink, 7, \
BTREE_IS_extents| \
@@ -1438,9 +1440,9 @@ enum btree_id {
*/
#define BTREE_ID_NR_MAX 63
-static inline bool btree_id_is_alloc(enum btree_id id)
+static inline bool btree_id_is_alloc(enum btree_id btree)
{
- switch (id) {
+ switch (btree) {
case BTREE_ID_alloc:
case BTREE_ID_backpointers:
case BTREE_ID_need_discard:
@@ -1454,6 +1456,33 @@ static inline bool btree_id_is_alloc(enum btree_id id)
}
}
+/* We can reconstruct these btrees from information in other btrees */
+static inline bool btree_id_can_reconstruct(enum btree_id btree)
+{
+ if (btree_id_is_alloc(btree))
+ return true;
+
+ switch (btree) {
+ case BTREE_ID_snapshot_trees:
+ case BTREE_ID_deleted_inodes:
+ case BTREE_ID_rebalance_work:
+ case BTREE_ID_subvolume_children:
+ return true;
+ default:
+ return false;
+ }
+}
+
+/*
+ * We can reconstruct BTREE_ID_alloc, but reconstucting it from scratch is not
+ * so cheap and OOMs on huge filesystems (until we have online
+ * check_allocations)
+ */
+static inline bool btree_id_recovers_from_scan(enum btree_id btree)
+{
+ return btree == BTREE_ID_alloc || !btree_id_can_reconstruct(btree);
+}
+
#define BTREE_MAX_DEPTH 4U
/* Btree nodes */
diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c
index 6b91649688da..ae7d260589d8 100644
--- a/fs/bcachefs/btree_gc.c
+++ b/fs/bcachefs/btree_gc.c
@@ -44,27 +44,6 @@
#include <linux/rcupdate.h>
#include <linux/sched/task.h>
-/*
- * Returns true if it's a btree we can easily reconstruct, or otherwise won't
- * cause data loss if it's missing:
- */
-static bool btree_id_important(enum btree_id btree)
-{
- if (btree_id_is_alloc(btree))
- return false;
-
- switch (btree) {
- case BTREE_ID_quotas:
- case BTREE_ID_snapshot_trees:
- case BTREE_ID_logged_ops:
- case BTREE_ID_rebalance_work:
- case BTREE_ID_subvolume_children:
- return false;
- default:
- return true;
- }
-}
-
static const char * const bch2_gc_phase_strs[] = {
#define x(n) #n,
GC_PHASES()
@@ -557,45 +536,55 @@ fsck_err:
return ret;
}
-static int bch2_check_root(struct btree_trans *trans, enum btree_id btree,
+static int bch2_topology_check_root(struct btree_trans *trans, enum btree_id btree,
bool *reconstructed_root)
{
struct bch_fs *c = trans->c;
struct btree_root *r = bch2_btree_id_root(c, btree);
- CLASS(printbuf, buf)();
- int ret = 0;
-
- bch2_btree_id_to_text(&buf, btree);
- if (r->error) {
- bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
+ if (!r->error)
+ return 0;
- ret = bch2_btree_has_scanned_nodes(c, btree);
- if (ret < 0)
- goto err;
+ CLASS(printbuf, buf)();
+ int ret = 0;
- if (!ret) {
- __fsck_err(trans,
- FSCK_CAN_FIX|(!btree_id_important(btree) ? FSCK_AUTOFIX : 0),
- btree_root_unreadable_and_scan_found_nothing,
- "no nodes found for btree %s, continue?", buf.buf);
+ if (!btree_id_recovers_from_scan(btree)) {
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 0);
+ ret = bch2_btree_lost_data(c, &buf, btree);
+ bch2_print_str(c, KERN_NOTICE, buf.buf);
+ goto out;
+ }
- r->alive = false;
- r->error = 0;
- bch2_btree_root_alloc_fake_trans(trans, btree, 0);
- } else {
- r->alive = false;
- r->error = 0;
- bch2_btree_root_alloc_fake_trans(trans, btree, 1);
+ bch2_btree_id_to_text(&buf, btree);
+ bch_info(c, "btree root %s unreadable, must recover from scan", buf.buf);
- bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
- ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX);
- if (ret)
- return ret;
- }
+ ret = bch2_btree_has_scanned_nodes(c, btree);
+ if (ret < 0)
+ goto err;
- *reconstructed_root = true;
+ if (!ret) {
+ __fsck_err(trans,
+ FSCK_CAN_FIX|(btree_id_can_reconstruct(btree) ? FSCK_AUTOFIX : 0),
+ btree_root_unreadable_and_scan_found_nothing,
+ "no nodes found for btree %s, continue?", buf.buf);
+
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 0);
+ } else {
+ r->alive = false;
+ r->error = 0;
+ bch2_btree_root_alloc_fake_trans(trans, btree, 1);
+
+ bch2_shoot_down_journal_keys(c, btree, 1, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
+ ret = bch2_get_scanned_nodes(c, btree, 0, POS_MIN, SPOS_MAX);
+ if (ret)
+ return ret;
}
+out:
+ *reconstructed_root = true;
err:
fsck_err:
bch_err_fn(c, ret);
@@ -613,7 +602,7 @@ int bch2_check_topology(struct bch_fs *c)
for (unsigned i = 0; i < btree_id_nr_alive(c) && !ret; i++) {
bool reconstructed_root = false;
recover:
- ret = lockrestart_do(trans, bch2_check_root(trans, i, &reconstructed_root));
+ ret = lockrestart_do(trans, bch2_topology_check_root(trans, i, &reconstructed_root));
if (ret)
break;
diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c
index 8962c481e310..546b559fe3ce 100644
--- a/fs/bcachefs/btree_iter.c
+++ b/fs/bcachefs/btree_iter.c
@@ -2366,7 +2366,9 @@ struct bkey_s_c bch2_btree_iter_peek_max(struct btree_iter *iter, struct bpos en
bch2_trans_verify_not_unlocked_or_in_restart(trans);
bch2_btree_iter_verify_entry_exit(iter);
- EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) && bkey_eq(end, POS_MAX));
+ EBUG_ON((iter->flags & BTREE_ITER_filter_snapshots) &&
+ !(iter->flags & BTREE_ITER_nofilter_whiteouts) &&
+ bkey_eq(end, POS_MAX));
ret = trans_maybe_inject_restart(trans, _RET_IP_);
if (unlikely(ret)) {
diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c
index 4b7b5ca74ba1..b618a0bd1186 100644
--- a/fs/bcachefs/btree_node_scan.c
+++ b/fs/bcachefs/btree_node_scan.c
@@ -149,7 +149,7 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca,
bch2_encrypt(c, BSET_CSUM_TYPE(&bn->keys), nonce, &bn->flags, bytes);
}
- if (btree_id_is_alloc(BTREE_NODE_ID(bn)))
+ if (btree_id_can_reconstruct(BTREE_NODE_ID(bn)))
return;
if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH)
@@ -534,7 +534,7 @@ int bch2_btree_has_scanned_nodes(struct bch_fs *c, enum btree_id btree)
int bch2_get_scanned_nodes(struct bch_fs *c, enum btree_id btree,
unsigned level, struct bpos node_min, struct bpos node_max)
{
- if (btree_id_is_alloc(btree))
+ if (!btree_id_recovers_from_scan(btree))
return 0;
struct find_btree_nodes *f = &c->found_btree_nodes;
diff --git a/fs/bcachefs/btree_update.c b/fs/bcachefs/btree_update.c
index f59f018fe0d8..053a837cf241 100644
--- a/fs/bcachefs/btree_update.c
+++ b/fs/bcachefs/btree_update.c
@@ -12,7 +12,6 @@
#include "extents.h"
#include "keylist.h"
#include "snapshot.h"
-#include "super-io.h"
#include "trace.h"
#include <linux/string_helpers.h>
@@ -159,21 +158,6 @@ int __bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
return ret;
}
-static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree, const struct bkey *k)
-{
- /*
- * KEY_TYPE_extent_whiteout indicates that there isn't a real extent
- * present at that position: key start positions inclusive of
- * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are
- * monotonically increasing
- */
- return btree_id_is_extents_snapshots(btree) &&
- bkey_deleted(k) &&
- !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts)
- ? KEY_TYPE_extent_whiteout
- : KEY_TYPE_whiteout;
-}
-
int bch2_trans_update_extent_overwrite(struct btree_trans *trans,
struct btree_iter *iter,
enum btree_iter_update_trigger_flags flags,
diff --git a/fs/bcachefs/btree_update.h b/fs/bcachefs/btree_update.h
index 663739db82b1..18560ca80057 100644
--- a/fs/bcachefs/btree_update.h
+++ b/fs/bcachefs/btree_update.h
@@ -5,6 +5,7 @@
#include "btree_iter.h"
#include "journal.h"
#include "snapshot.h"
+#include "super-io.h"
struct bch_fs;
struct btree;
@@ -110,6 +111,22 @@ static inline int bch2_insert_snapshot_whiteouts(struct btree_trans *trans,
: 0;
}
+static inline enum bch_bkey_type extent_whiteout_type(struct bch_fs *c, enum btree_id btree,
+ const struct bkey *k)
+{
+ /*
+ * KEY_TYPE_extent_whiteout indicates that there isn't a real extent
+ * present at that position: key start positions inclusive of
+ * KEY_TYPE_extent_whiteout (but not KEY_TYPE_whiteout) are
+ * monotonically increasing
+ */
+ return btree_id_is_extents_snapshots(btree) &&
+ bkey_deleted(k) &&
+ !bch2_request_incompat_feature(c, bcachefs_metadata_version_extent_snapshot_whiteouts)
+ ? KEY_TYPE_extent_whiteout
+ : KEY_TYPE_whiteout;
+}
+
int bch2_trans_update_extent_overwrite(struct btree_trans *, struct btree_iter *,
enum btree_iter_update_trigger_flags,
struct bkey_s_c, struct bkey_s_c);
diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c
index 76897cf15946..65ca54c5b0ff 100644
--- a/fs/bcachefs/btree_update_interior.c
+++ b/fs/bcachefs/btree_update_interior.c
@@ -336,6 +336,20 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans,
BUG_ON(b->ob.nr);
mutex_lock(&c->btree_reserve_cache_lock);
+ if (unlikely(c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark))) {
+ guard(spinlock)(&c->freelist_lock);
+ if (c->open_buckets_nr_free <= bch2_open_buckets_reserved(watermark)) {
+ if (cl)
+ closure_wait(&c->open_buckets_wait, cl);
+
+ ret = cl
+ ? bch_err_throw(c, bucket_alloc_blocked)
+ : bch_err_throw(c, open_buckets_empty);
+ mutex_unlock(&c->btree_reserve_cache_lock);
+ goto err;
+ }
+ }
+
if (c->btree_reserve_cache_nr > nr_reserve) {
for (struct btree_alloc *a = c->btree_reserve_cache;
a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) {
diff --git a/fs/bcachefs/btree_write_buffer.c b/fs/bcachefs/btree_write_buffer.c
index afad11831e1d..755fb25a8eba 100644
--- a/fs/bcachefs/btree_write_buffer.c
+++ b/fs/bcachefs/btree_write_buffer.c
@@ -701,8 +701,16 @@ int bch2_accounting_key_to_wb_slowpath(struct bch_fs *c, enum btree_id btree,
struct bkey_i_accounting *k)
{
struct btree_write_buffer *wb = &c->btree_write_buffer;
- struct btree_write_buffered_key new = { .btree = btree };
+ if (trace_accounting_key_to_wb_slowpath_enabled()) {
+ CLASS(printbuf, buf)();
+ prt_printf(&buf, "have: %zu\n", wb->accounting.nr);
+ bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&k->k_i));
+ trace_accounting_key_to_wb_slowpath(c, buf.buf);
+ }
+ count_event(c, accounting_key_to_wb_slowpath);
+
+ struct btree_write_buffered_key new = { .btree = btree };
bkey_copy(&new.k, &k->k_i);
int ret = darray_push(&wb->accounting, new);
diff --git a/fs/bcachefs/btree_write_buffer.h b/fs/bcachefs/btree_write_buffer.h
index e484cd6b90b0..b862bdf67f58 100644
--- a/fs/bcachefs/btree_write_buffer.h
+++ b/fs/bcachefs/btree_write_buffer.h
@@ -95,7 +95,7 @@ static inline int bch2_journal_key_to_wb(struct bch_fs *c,
EBUG_ON(!dst->seq);
- return k->k.type == KEY_TYPE_accounting
+ return bch2_bkey_is_accounting_mem(&k->k)
? bch2_accounting_key_to_wb(c, btree, bkey_i_to_accounting(k))
: __bch2_journal_key_to_wb(c, dst, btree, k);
}
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 01838a3a189d..a314d70c6b8e 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -225,7 +225,7 @@ static void trace_io_move_created_rebalance2(struct data_update *m,
trace_io_move_created_rebalance(c, buf.buf);
- this_cpu_inc(c->counters[BCH_COUNTER_io_move_created_rebalance]);
+ count_event(c, io_move_created_rebalance);
}
noinline_for_stack
@@ -693,6 +693,15 @@ int bch2_extent_drop_ptrs(struct btree_trans *trans,
if (ret)
return ret;
+ const union bch_extent_entry *entry;
+ struct extent_ptr_decoded p;
+ unsigned i = 0;
+ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
+ if (data_opts->kill_ec_ptrs & BIT(i))
+ bch2_bkey_drop_ec(n, p.ptr.dev);
+ i++;
+ }
+
while (data_opts->kill_ptrs) {
unsigned i = 0, drop = __fls(data_opts->kill_ptrs);
diff --git a/fs/bcachefs/data_update.h b/fs/bcachefs/data_update.h
index 5e14d13568de..fc12aa65366f 100644
--- a/fs/bcachefs/data_update.h
+++ b/fs/bcachefs/data_update.h
@@ -12,6 +12,7 @@ struct moving_context;
struct data_update_opts {
unsigned rewrite_ptrs;
unsigned kill_ptrs;
+ unsigned kill_ec_ptrs;
u16 target;
u8 extra_replicas;
unsigned btree_insert_flags;
diff --git a/fs/bcachefs/dirent.c b/fs/bcachefs/dirent.c
index cb44b35e0f1d..fe6f3d874a47 100644
--- a/fs/bcachefs/dirent.c
+++ b/fs/bcachefs/dirent.c
@@ -95,7 +95,7 @@ static u64 bch2_dirent_hash(const struct bch_hash_info *info,
bch2_str_hash_update(&ctx, info, name->name, name->len);
/* [0,2) reserved for dots */
- return max_t(u64, bch2_str_hash_end(&ctx, info), 2);
+ return max_t(u64, bch2_str_hash_end(&ctx, info, true), 2);
}
static u64 dirent_hash_key(const struct bch_hash_info *info, const void *key)
diff --git a/fs/bcachefs/disk_accounting.c b/fs/bcachefs/disk_accounting.c
index f96530c70262..5944ad6d0f8d 100644
--- a/fs/bcachefs/disk_accounting.c
+++ b/fs/bcachefs/disk_accounting.c
@@ -184,6 +184,9 @@ int bch2_accounting_validate(struct bch_fs *c, struct bkey_s_c k,
void *end = &acc_k + 1;
int ret = 0;
+ if (acc_k.type >= BCH_DISK_ACCOUNTING_TYPE_NR)
+ return 0;
+
bkey_fsck_err_on((from.flags & BCH_VALIDATE_commit) &&
bversion_zero(k.k->bversion),
c, accounting_key_version_0,
diff --git a/fs/bcachefs/disk_accounting.h b/fs/bcachefs/disk_accounting.h
index 43f4b21d0aab..cc73cce98a44 100644
--- a/fs/bcachefs/disk_accounting.h
+++ b/fs/bcachefs/disk_accounting.h
@@ -145,6 +145,16 @@ static inline bool bch2_accounting_is_mem(struct disk_accounting_pos *acc)
acc->type != BCH_DISK_ACCOUNTING_inum;
}
+static inline bool bch2_bkey_is_accounting_mem(struct bkey *k)
+{
+ if (k->type != KEY_TYPE_accounting)
+ return false;
+
+ struct disk_accounting_pos acc_k;
+ bpos_to_disk_accounting_pos(&acc_k, k->p);
+ return bch2_accounting_is_mem(&acc_k);
+}
+
/*
* Update in memory counters so they match the btree update we're doing; called
* from transaction commit path
diff --git a/fs/bcachefs/ec.c b/fs/bcachefs/ec.c
index c2840cb674b2..15c7d8ff5dea 100644
--- a/fs/bcachefs/ec.c
+++ b/fs/bcachefs/ec.c
@@ -35,6 +35,8 @@
#include <linux/raid/pq.h>
#include <linux/raid/xor.h>
+static bool bch2_stripe_is_open(struct bch_fs *, u64);
+
static void raid5_recov(unsigned disks, unsigned failed_idx,
size_t size, void **data)
{
@@ -386,11 +388,20 @@ int bch2_trigger_stripe(struct btree_trans *trans,
new_s->nr_redundant != old_s->nr_redundant));
if (flags & BTREE_TRIGGER_transactional) {
+ u64 old_lru_pos = stripe_lru_pos(old_s);
+ u64 new_lru_pos = stripe_lru_pos(new_s);
+
+ if (new_lru_pos == STRIPE_LRU_POS_EMPTY &&
+ !bch2_stripe_is_open(c, idx)) {
+ _new.k->type = KEY_TYPE_deleted;
+ set_bkey_val_u64s(_new.k, 0);
+ new_s = NULL;
+ new_lru_pos = 0;
+ }
+
int ret = bch2_lru_change(trans,
- BCH_LRU_STRIPE_FRAGMENTATION,
- idx,
- stripe_lru_pos(old_s),
- stripe_lru_pos(new_s));
+ BCH_LRU_STRIPE_FRAGMENTATION, idx,
+ old_lru_pos, new_lru_pos);
if (ret)
return ret;
}
@@ -954,7 +965,7 @@ static int ec_stripe_delete(struct btree_trans *trans, u64 idx)
*/
if (k.k->type == KEY_TYPE_stripe &&
!bch2_stripe_is_open(trans->c, idx) &&
- stripe_lru_pos(bkey_s_c_to_stripe(k).v) == 1)
+ stripe_lru_pos(bkey_s_c_to_stripe(k).v) == STRIPE_LRU_POS_EMPTY)
return bch2_btree_delete_at(trans, &iter, 0);
return 0;
@@ -1767,7 +1778,14 @@ static int __get_existing_stripe(struct btree_trans *trans,
{
struct bch_fs *c = trans->c;
- CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx), BTREE_ITER_nopreserve);
+ /*
+ * We require an intent lock here until we have the stripe open, for
+ * exclusion with bch2_trigger_stripe() - which will delete empty
+ * stripes if they're not open, but it can't actually open them:
+ */
+ CLASS(btree_iter, iter)(trans, BTREE_ID_stripes, POS(0, idx),
+ BTREE_ITER_intent|
+ BTREE_ITER_nopreserve);
struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter);
int ret = bkey_err(k);
if (ret)
@@ -1778,8 +1796,19 @@ static int __get_existing_stripe(struct btree_trans *trans,
return 0;
struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
- if (stripe_lru_pos(s.v) <= 1)
- return 0;
+
+ if (stripe_lru_pos(s.v) == STRIPE_LRU_POS_EMPTY) {
+ /*
+ * We can't guarantee that the trigger will always delete
+ * stripes - the stripe might still be open when the last data
+ * in it was deleted
+ */
+ return !bch2_stripe_is_open(c, idx)
+ ? bch2_btree_delete_at(trans, &iter, 0) ?:
+ bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc) ?:
+ bch_err_throw(c, transaction_restart_commit)
+ : 0;
+ }
if (s.v->disk_label == head->disk_label &&
s.v->algorithm == head->algo &&
diff --git a/fs/bcachefs/error.h b/fs/bcachefs/error.h
index 0c3c3a24fc6f..213814787dd6 100644
--- a/fs/bcachefs/error.h
+++ b/fs/bcachefs/error.h
@@ -173,7 +173,8 @@ do { \
if (!bch2_err_matches(_ret, BCH_ERR_fsck_fix) && \
!bch2_err_matches(_ret, BCH_ERR_fsck_ignore)) \
ret = _ret; \
- ret = bch_err_throw(c, fsck_delete_bkey); \
+ else \
+ ret = bch_err_throw(c, fsck_delete_bkey); \
goto fsck_err; \
} while (0)
diff --git a/fs/bcachefs/extent_update.c b/fs/bcachefs/extent_update.c
index c4b0ea1adaa8..7ddb156c765c 100644
--- a/fs/bcachefs/extent_update.c
+++ b/fs/bcachefs/extent_update.c
@@ -98,11 +98,13 @@ static int count_iters_for_insert(struct btree_trans *trans,
return ret2 ?: ret;
}
-int bch2_extent_atomic_end(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bpos *end)
+int bch2_extent_trim_atomic(struct btree_trans *trans,
+ struct btree_iter *iter,
+ struct bkey_i *insert)
{
- unsigned nr_iters = 0;
+ enum bch_bkey_type whiteout_type =
+ extent_whiteout_type(trans->c, iter->btree_id, &insert->k);
+ struct bpos end = insert->k.p;
struct btree_iter copy;
bch2_trans_copy_iter(&copy, iter);
@@ -111,42 +113,54 @@ int bch2_extent_atomic_end(struct btree_trans *trans,
if (ret)
goto err;
+ copy.flags |= BTREE_ITER_nofilter_whiteouts;
+
struct bkey_s_c k;
- for_each_btree_key_max_continue_norestart(copy, *end, 0, k, ret) {
+ unsigned nr_iters = 0;
+ for_each_btree_key_continue_norestart(copy, 0, k, ret) {
unsigned offset = 0;
if (bkey_gt(iter->pos, bkey_start_pos(k.k)))
offset = iter->pos.offset - bkey_start_offset(k.k);
- ret = count_iters_for_insert(trans, k, offset, end, &nr_iters);
- if (ret)
- break;
+ if (bkey_extent_whiteout(k.k)) {
+ if (bpos_gt(k.k->p, insert->k.p)) {
+ if (k.k->type == KEY_TYPE_extent_whiteout)
+ break;
+ else
+ continue;
+ } else if (k.k->type != whiteout_type) {
+ nr_iters += 1;
+ if (nr_iters >= EXTENT_ITERS_MAX) {
+ end = bpos_min(end, k.k->p);
+ break;
+ }
+ }
+ } else {
+ if (bpos_ge(bkey_start_pos(k.k), end))
+ break;
+
+ ret = count_iters_for_insert(trans, k, offset, &end, &nr_iters);
+ if (ret)
+ break;
+ }
}
err:
bch2_trans_iter_exit(&copy);
- return ret < 0 ? ret : 0;
-}
-
-int bch2_extent_trim_atomic(struct btree_trans *trans,
- struct btree_iter *iter,
- struct bkey_i *k)
-{
- struct bpos end = k->k.p;
- int ret = bch2_extent_atomic_end(trans, iter, &end);
- if (ret)
+ if (ret < 0)
return ret;
/* tracepoint */
- if (bpos_lt(end, k->k.p)) {
+ if (bpos_lt(end, insert->k.p)) {
if (trace_extent_trim_atomic_enabled()) {
CLASS(printbuf, buf)();
bch2_bpos_to_text(&buf, end);
prt_newline(&buf);
- bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(k));
+ bch2_bkey_val_to_text(&buf, trans->c, bkey_i_to_s_c(insert));
trace_extent_trim_atomic(trans->c, buf.buf);
}
- bch2_cut_back(end, k);
+ bch2_cut_back(end, insert);
}
return 0;
}
diff --git a/fs/bcachefs/extent_update.h b/fs/bcachefs/extent_update.h
index 34467db53f45..2d956d971b11 100644
--- a/fs/bcachefs/extent_update.h
+++ b/fs/bcachefs/extent_update.h
@@ -4,8 +4,6 @@
#include "bcachefs.h"
-int bch2_extent_atomic_end(struct btree_trans *, struct btree_iter *,
- struct bpos *);
int bch2_extent_trim_atomic(struct btree_trans *, struct btree_iter *,
struct bkey_i *);
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index b879a586b7f6..7ab0398707d8 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -995,6 +995,22 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev)
bch2_bkey_drop_ptrs_noerror(k, ptr, ptr->dev == dev);
}
+void bch2_bkey_drop_ec(struct bkey_i *k, unsigned dev)
+{
+ struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(k));
+ union bch_extent_entry *entry, *ec = NULL;
+
+ bkey_extent_entry_for_each(ptrs, entry) {
+ if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr)
+ ec = entry;
+ else if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_ptr &&
+ entry->ptr.dev == dev) {
+ bch2_bkey_extent_entry_drop(k, ec);
+ return;
+ }
+ }
+}
+
const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev)
{
struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
@@ -1757,3 +1773,4 @@ int bch2_cut_back_s(struct bpos where, struct bkey_s k)
memset(bkey_val_end(k), 0, val_u64s_delta * sizeof(u64));
return -val_u64s_delta;
}
+
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 35ee03cd5065..f6dcb17108cd 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -650,6 +650,7 @@ void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
void bch2_bkey_drop_device_noerror(struct bkey_s, unsigned);
void bch2_bkey_drop_device(struct bkey_s, unsigned);
+void bch2_bkey_drop_ec(struct bkey_i *k, unsigned);
#define bch2_bkey_drop_ptrs_noerror(_k, _ptr, _cond) \
do { \
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 0005569ecace..ab542cef96fe 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -42,6 +42,14 @@ struct readpages_iter {
folios folios;
};
+static inline void readpages_iter_folio_revert(struct readahead_control *ractl,
+ struct folio *folio)
+{
+ bch2_folio_release(folio);
+ ractl->_nr_pages += folio_nr_pages(folio);
+ ractl->_index -= folio_nr_pages(folio);
+}
+
static int readpages_iter_init(struct readpages_iter *iter,
struct readahead_control *ractl)
{
@@ -52,9 +60,7 @@ static int readpages_iter_init(struct readpages_iter *iter,
while ((folio = __readahead_folio(ractl))) {
if (!bch2_folio_create(folio, GFP_KERNEL) ||
darray_push(&iter->folios, folio)) {
- bch2_folio_release(folio);
- ractl->_nr_pages += folio_nr_pages(folio);
- ractl->_index -= folio_nr_pages(folio);
+ readpages_iter_folio_revert(ractl, folio);
return iter->folios.nr ? 0 : -ENOMEM;
}
@@ -64,6 +70,15 @@ static int readpages_iter_init(struct readpages_iter *iter,
return 0;
}
+static void readpages_iter_exit(struct readpages_iter *iter,
+ struct readahead_control *ractl)
+{
+ darray_for_each_reverse(iter->folios, folio) {
+ readpages_iter_folio_revert(ractl, *folio);
+ folio_get(*folio);
+ }
+}
+
static inline struct folio *readpage_iter_peek(struct readpages_iter *iter)
{
if (iter->idx >= iter->folios.nr)
@@ -274,6 +289,8 @@ void bch2_readahead(struct readahead_control *ractl)
struct readpages_iter readpages_iter;
struct blk_plug plug;
+ lockdep_assert_held(&inode->ei_pagecache_lock);
+
bch2_inode_opts_get(&opts, c, &inode->ei_inode);
int ret = readpages_iter_init(&readpages_iter, ractl);
@@ -290,7 +307,10 @@ void bch2_readahead(struct readahead_control *ractl)
* scheduling.
*/
blk_start_plug(&plug);
- bch2_pagecache_add_get(inode);
+ if (!bch2_pagecache_add_tryget(inode)) {
+ readpages_iter_exit(&readpages_iter, ractl);
+ goto out;
+ }
struct btree_trans *trans = bch2_trans_get(c);
while ((folio = readpage_iter_peek(&readpages_iter))) {
@@ -317,6 +337,7 @@ void bch2_readahead(struct readahead_control *ractl)
bch2_trans_put(trans);
bch2_pagecache_add_put(inode);
+out:
blk_finish_plug(&plug);
darray_exit(&readpages_iter.folios);
}
@@ -759,7 +780,6 @@ int bch2_write_end(struct file *file, struct address_space *mapping,
struct bch2_folio_reservation *res = fsdata;
unsigned offset = pos - folio_pos(folio);
- lockdep_assert_held(&inode->v.i_rwsem);
BUG_ON(offset + copied > folio_size(folio));
if (unlikely(copied < len && !folio_test_uptodate(folio))) {
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 76d2647d9500..c79b1f6f7db3 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -441,6 +441,12 @@ static struct inode *bch2_alloc_inode(struct super_block *sb)
BUG();
}
+static __maybe_unused int ptrcmp_fn(const struct lockdep_map *l,
+ const struct lockdep_map *r)
+{
+ return cmp_int(l, r);
+}
+
static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp)
{
struct bch_inode_info *inode = alloc_inode_sb(c->vfs_sb,
@@ -452,6 +458,7 @@ static struct bch_inode_info *__bch2_new_inode(struct bch_fs *c, gfp_t gfp)
mutex_init(&inode->ei_update_lock);
two_state_lock_init(&inode->ei_pagecache_lock);
INIT_LIST_HEAD(&inode->ei_vfs_inode_list);
+ lock_set_cmp_fn(&inode->ei_pagecache_lock, ptrcmp_fn, NULL);
inode->ei_flags = 0;
mutex_init(&inode->ei_quota_lock);
memset(&inode->ei_devs_need_flush, 0, sizeof(inode->ei_devs_need_flush));
@@ -826,14 +833,6 @@ int __bch2_unlink(struct inode *vdir, struct dentry *dentry,
bch2_inode_update_after_write(trans, inode, &inode_u,
ATTR_MTIME);
- if (inode_u.bi_subvol) {
- /*
- * Subvolume deletion is asynchronous, but we still want to tell
- * the VFS that it's been deleted here:
- */
- set_nlink(&inode->v, 0);
- }
-
if (IS_CASEFOLDED(vdir))
d_invalidate(dentry);
err:
@@ -865,9 +864,7 @@ static int bch2_symlink(struct mnt_idmap *idmap,
if (IS_ERR(inode))
return bch2_err_class(PTR_ERR(inode));
- inode_lock(&inode->v);
ret = page_symlink(&inode->v, symname, strlen(symname) + 1);
- inode_unlock(&inode->v);
if (unlikely(ret))
goto err;
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 01c1c6372229..ccc44b1fc178 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -266,7 +266,8 @@ create_lostfound:
root_inode.bi_nlink++;
- ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu);
+ ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu,
+ inode_opt_get(c, &root_inode, inodes_32bit));
if (ret)
goto err;
@@ -573,7 +574,7 @@ static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 sub
new_inode.bi_subvol = subvolid;
- int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?:
+ int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu, false) ?:
bch2_btree_iter_traverse(&inode_iter) ?:
bch2_inode_write(trans, &inode_iter, &new_inode);
bch2_trans_iter_exit(&inode_iter);
diff --git a/fs/bcachefs/inode.c b/fs/bcachefs/inode.c
index d5e5190f0663..4aa130ff7cf6 100644
--- a/fs/bcachefs/inode.c
+++ b/fs/bcachefs/inode.c
@@ -944,11 +944,12 @@ void bch2_inode_init(struct bch_fs *c, struct bch_inode_unpacked *inode_u,
}
static struct bkey_i_inode_alloc_cursor *
-bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max)
+bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *max,
+ bool is_32bit)
{
struct bch_fs *c = trans->c;
- u64 cursor_idx = c->opts.inodes_32bit ? 0 : cpu + 1;
+ u64 cursor_idx = is_32bit ? 0 : cpu + 1;
cursor_idx &= ~(~0ULL << c->opts.shard_inode_numbers_bits);
@@ -967,7 +968,7 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
if (IS_ERR(cursor))
return cursor;
- if (c->opts.inodes_32bit) {
+ if (is_32bit) {
*min = BLOCKDEV_INODE_MAX;
*max = INT_MAX;
} else {
@@ -996,11 +997,11 @@ bch2_inode_alloc_cursor_get(struct btree_trans *trans, u64 cpu, u64 *min, u64 *m
int bch2_inode_create(struct btree_trans *trans,
struct btree_iter *iter,
struct bch_inode_unpacked *inode_u,
- u32 snapshot, u64 cpu)
+ u32 snapshot, u64 cpu, bool is_32bit)
{
u64 min, max;
struct bkey_i_inode_alloc_cursor *cursor =
- bch2_inode_alloc_cursor_get(trans, cpu, &min, &max);
+ bch2_inode_alloc_cursor_get(trans, cpu, &min, &max, is_32bit);
int ret = PTR_ERR_OR_ZERO(cursor);
if (ret)
return ret;
diff --git a/fs/bcachefs/inode.h b/fs/bcachefs/inode.h
index b8ec3e628d90..79092ea74844 100644
--- a/fs/bcachefs/inode.h
+++ b/fs/bcachefs/inode.h
@@ -172,7 +172,7 @@ void bch2_inode_init(struct bch_fs *, struct bch_inode_unpacked *,
struct bch_inode_unpacked *);
int bch2_inode_create(struct btree_trans *, struct btree_iter *,
- struct bch_inode_unpacked *, u32, u64);
+ struct bch_inode_unpacked *, u32, u64, bool);
int bch2_inode_rm(struct bch_fs *, subvol_inum);
diff --git a/fs/bcachefs/inode_format.h b/fs/bcachefs/inode_format.h
index 1f00938b1bdc..e07fa6cc99bd 100644
--- a/fs/bcachefs/inode_format.h
+++ b/fs/bcachefs/inode_format.h
@@ -144,7 +144,8 @@ enum inode_opt_id {
x(unlinked, 7) \
x(backptr_untrusted, 8) \
x(has_child_snapshot, 9) \
- x(has_case_insensitive, 10)
+ x(has_case_insensitive, 10) \
+ x(31bit_dirent_offset, 11)
/* bits 20+ reserved for packed fields below: */
diff --git a/fs/bcachefs/migrate.c b/fs/bcachefs/migrate.c
index a66d01d04e57..892990b4a6a6 100644
--- a/fs/bcachefs/migrate.c
+++ b/fs/bcachefs/migrate.c
@@ -125,6 +125,10 @@ static int bch2_dev_usrdata_drop(struct bch_fs *c,
if (!btree_type_has_ptrs(id))
continue;
+ /* Stripe keys have pointers, but are handled separately */
+ if (id == BTREE_ID_stripes)
+ continue;
+
int ret = for_each_btree_key_commit(trans, iter, id, POS_MIN,
BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k,
NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({
diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c
index 30fe269d531d..4f41f1f6ec6c 100644
--- a/fs/bcachefs/move.c
+++ b/fs/bcachefs/move.c
@@ -150,7 +150,7 @@ static void move_write_done(struct bch_write_op *op)
bch2_write_op_to_text(&buf, op);
trace_io_move_write_fail(c, buf.buf);
}
- this_cpu_inc(c->counters[BCH_COUNTER_io_move_write_fail]);
+ count_event(c, io_move_write_fail);
ctxt->write_error = true;
}
@@ -344,7 +344,7 @@ int bch2_move_extent(struct moving_context *ctxt,
if (!data_opts.rewrite_ptrs &&
!data_opts.extra_replicas &&
!data_opts.scrub) {
- if (data_opts.kill_ptrs) {
+ if (data_opts.kill_ptrs|data_opts.kill_ec_ptrs) {
this_cpu_add(c->counters[BCH_COUNTER_io_move_drop_only], k.k->size);
return bch2_extent_drop_ptrs(trans, iter, k, &io_opts, &data_opts);
} else {
@@ -542,7 +542,7 @@ int bch2_move_ratelimit(struct moving_context *ctxt)
if (ctxt->wait_on_copygc && c->copygc_running) {
bch2_moving_ctxt_flush_all(ctxt);
- wait_event_killable(c->copygc_running_wq,
+ wait_event_freezable(c->copygc_running_wq,
!c->copygc_running ||
(is_kthread && kthread_should_stop()));
}
@@ -819,7 +819,9 @@ static int bch2_move_data(struct bch_fs *c,
unsigned min_depth_this_btree = min_depth;
- if (!btree_type_has_ptrs(id))
+ /* Stripe keys have pointers, but are handled separately */
+ if (!btree_type_has_ptrs(id) ||
+ id == BTREE_ID_stripes)
min_depth_this_btree = max(min_depth_this_btree, 1);
for (unsigned level = min_depth_this_btree;
@@ -1280,7 +1282,17 @@ static bool drop_extra_replicas_pred(struct bch_fs *c, void *arg,
i++;
}
- return data_opts->kill_ptrs != 0;
+ i = 0;
+ bkey_for_each_ptr_decode(k.k, bch2_bkey_ptrs_c(k), p, entry) {
+ if (p.has_ec && durability - p.ec.redundancy >= replicas) {
+ data_opts->kill_ec_ptrs |= BIT(i);
+ durability -= p.ec.redundancy;
+ }
+
+ i++;
+ }
+
+ return (data_opts->kill_ptrs|data_opts->kill_ec_ptrs) != 0;
}
static bool scrub_pred(struct bch_fs *c, void *_arg,
diff --git a/fs/bcachefs/namei.c b/fs/bcachefs/namei.c
index d1019052f182..5c321a0d1f89 100644
--- a/fs/bcachefs/namei.c
+++ b/fs/bcachefs/namei.c
@@ -62,7 +62,8 @@ int bch2_create_trans(struct btree_trans *trans,
if (flags & BCH_CREATE_TMPFILE)
new_inode->bi_flags |= BCH_INODE_unlinked;
- ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu);
+ ret = bch2_inode_create(trans, &inode_iter, new_inode, snapshot, cpu,
+ inode_opt_get(c, dir_u, inodes_32bit));
if (ret)
goto err;
diff --git a/fs/bcachefs/rebalance.c b/fs/bcachefs/rebalance.c
index 17ca56b0e2ac..e1db63d75a99 100644
--- a/fs/bcachefs/rebalance.c
+++ b/fs/bcachefs/rebalance.c
@@ -444,8 +444,9 @@ static int do_rebalance_extent(struct moving_context *ctxt,
bch2_bkey_buf_init(&sk);
- ret = bkey_err(k = next_rebalance_extent(trans, work_pos,
- extent_iter, &io_opts, &data_opts));
+ ret = lockrestart_do(trans,
+ bkey_err(k = next_rebalance_extent(trans, work_pos,
+ extent_iter, &io_opts, &data_opts)));
if (ret || !k.k)
goto out;
@@ -587,7 +588,7 @@ static int do_rebalance(struct moving_context *ctxt)
ret = k->k.type == KEY_TYPE_cookie
? do_rebalance_scan(ctxt, k->k.p.inode,
le64_to_cpu(bkey_i_to_cookie(k)->v.cookie))
- : lockrestart_do(trans, do_rebalance_extent(ctxt, k->k.p, &extent_iter));
+ : do_rebalance_extent(ctxt, k->k.p, &extent_iter);
if (ret)
break;
}
diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c
index 21aa2edb13ac..29e81f96db0f 100644
--- a/fs/bcachefs/recovery.c
+++ b/fs/bcachefs/recovery.c
@@ -607,7 +607,7 @@ static int read_btree_roots(struct bch_fs *c)
c, btree_root_read_error,
"error reading btree root %s: %s",
buf.buf, bch2_err_str(ret))) {
- if (btree_id_is_alloc(i))
+ if (btree_id_can_reconstruct(i))
r->error = 0;
ret = 0;
}
diff --git a/fs/bcachefs/sb-counters_format.h b/fs/bcachefs/sb-counters_format.h
index f3ea53a55384..44bc12573a0c 100644
--- a/fs/bcachefs/sb-counters_format.h
+++ b/fs/bcachefs/sb-counters_format.h
@@ -101,7 +101,9 @@ enum counters_flags {
x(trans_restart_write_buffer_flush, 75, TYPE_COUNTER) \
x(trans_restart_split_race, 76, TYPE_COUNTER) \
x(write_buffer_flush_slowpath, 77, TYPE_COUNTER) \
- x(write_buffer_flush_sync, 78, TYPE_COUNTER)
+ x(write_buffer_flush_sync, 78, TYPE_COUNTER) \
+ x(accounting_key_to_wb_slowpath, 94, TYPE_COUNTER) \
+ x(error_throw, 93, TYPE_COUNTER)
enum bch_persistent_counters {
#define x(t, n, ...) BCH_COUNTER_##t,
diff --git a/fs/bcachefs/snapshot.c b/fs/bcachefs/snapshot.c
index 84f987d3a02a..eab0c1e3ff56 100644
--- a/fs/bcachefs/snapshot.c
+++ b/fs/bcachefs/snapshot.c
@@ -1673,7 +1673,8 @@ static int bch2_fix_child_of_deleted_snapshot(struct btree_trans *trans,
return ret;
darray_for_each(*deleted, i)
- nr_deleted_ancestors += bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
+ nr_deleted_ancestors += bch2_snapshots_same_tree(c, s->k.p.offset, i->id) &&
+ bch2_snapshot_is_ancestor(c, s->k.p.offset, i->id);
if (!nr_deleted_ancestors)
return 0;
diff --git a/fs/bcachefs/snapshot.h b/fs/bcachefs/snapshot.h
index fef32a0118c4..28d9a29a1fd0 100644
--- a/fs/bcachefs/snapshot.h
+++ b/fs/bcachefs/snapshot.h
@@ -51,6 +51,17 @@ static inline u32 bch2_snapshot_tree(struct bch_fs *c, u32 id)
return s ? s->tree : 0;
}
+static inline bool bch2_snapshots_same_tree(struct bch_fs *c, u32 id1, u32 id2)
+{
+ if (id1 == id2)
+ return true;
+
+ guard(rcu)();
+ const struct snapshot_t *s1 = snapshot_t(c, id1);
+ const struct snapshot_t *s2 = snapshot_t(c, id2);
+ return s1 && s2 && s1->tree == s2->tree;
+}
+
static inline u32 __bch2_snapshot_parent_early(struct bch_fs *c, u32 id)
{
const struct snapshot_t *s = snapshot_t(c, id);
@@ -157,6 +168,10 @@ bool __bch2_snapshot_is_ancestor(struct bch_fs *, u32, u32);
static inline bool bch2_snapshot_is_ancestor(struct bch_fs *c, u32 id, u32 ancestor)
{
+ EBUG_ON(!id);
+ EBUG_ON(!ancestor);
+ EBUG_ON(!bch2_snapshots_same_tree(c, id, ancestor));
+
return id == ancestor
? true
: __bch2_snapshot_is_ancestor(c, id, ancestor);
diff --git a/fs/bcachefs/str_hash.h b/fs/bcachefs/str_hash.h
index 8c0fb44929cc..2a61cc36ddbf 100644
--- a/fs/bcachefs/str_hash.h
+++ b/fs/bcachefs/str_hash.h
@@ -34,6 +34,7 @@ bch2_str_hash_opt_to_type(struct bch_fs *c, enum bch_str_hash_opts opt)
struct bch_hash_info {
u32 inum_snapshot;
u8 type;
+ bool is_31bit;
struct unicode_map *cf_encoding;
/*
* For crc32 or crc64 string hashes the first key value of
@@ -48,6 +49,7 @@ bch2_hash_info_init(struct bch_fs *c, const struct bch_inode_unpacked *bi)
struct bch_hash_info info = {
.inum_snapshot = bi->bi_snapshot,
.type = INODE_STR_HASH(bi),
+ .is_31bit = bi->bi_flags & BCH_INODE_31bit_dirent_offset,
.cf_encoding = bch2_inode_casefold(c, bi) ? c->cf_encoding : NULL,
.siphash_key = { .k0 = bi->bi_hash_seed }
};
@@ -112,8 +114,8 @@ static inline void bch2_str_hash_update(struct bch_str_hash_ctx *ctx,
}
}
-static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
- const struct bch_hash_info *info)
+static inline u64 __bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
+ const struct bch_hash_info *info)
{
switch (info->type) {
case BCH_STR_HASH_crc32c:
@@ -128,6 +130,14 @@ static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
}
}
+static inline u64 bch2_str_hash_end(struct bch_str_hash_ctx *ctx,
+ const struct bch_hash_info *info,
+ bool maybe_31bit)
+{
+ return __bch2_str_hash_end(ctx, info) &
+ (maybe_31bit && info->is_31bit ? INT_MAX : U64_MAX);
+}
+
struct bch_hash_desc {
enum btree_id btree_id;
u8 key_type;
diff --git a/fs/bcachefs/super-io.c b/fs/bcachefs/super-io.c
index be7ed612d28f..369465a4de77 100644
--- a/fs/bcachefs/super-io.c
+++ b/fs/bcachefs/super-io.c
@@ -89,7 +89,7 @@ int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version v
prt_str(&buf, "requested incompat feature ");
bch2_version_to_text(&buf, version);
prt_str(&buf, " currently not enabled, allowed up to ");
- bch2_version_to_text(&buf, version);
+ bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
prt_printf(&buf, "\n set version_upgrade=incompat to enable");
bch_notice(c, "%s", buf.buf);
diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c
index ef15e614f4f3..09e7f8ae9922 100644
--- a/fs/bcachefs/super.c
+++ b/fs/bcachefs/super.c
@@ -2542,11 +2542,6 @@ struct bch_fs *bch2_fs_open(darray_const_str *devices,
BUG_ON(darray_push(&sbs, sb));
}
- if (opts->nochanges && !opts->read_only) {
- ret = bch_err_throw(c, erofs_nochanges);
- goto err_print;
- }
-
darray_for_each(sbs, sb)
if (!best || sb_cmp(sb->sb, best->sb) > 0)
best = sb;
diff --git a/fs/bcachefs/trace.h b/fs/bcachefs/trace.h
index 3776a1403104..269cdf1a87a4 100644
--- a/fs/bcachefs/trace.h
+++ b/fs/bcachefs/trace.h
@@ -1179,6 +1179,11 @@ DEFINE_EVENT(transaction_event, trans_restart_write_buffer_flush,
TP_ARGS(trans, caller_ip)
);
+DEFINE_EVENT(fs_str, accounting_key_to_wb_slowpath,
+ TP_PROTO(struct bch_fs *c, const char *str),
+ TP_ARGS(c, str)
+);
+
TRACE_EVENT(path_downgrade,
TP_PROTO(struct btree_trans *trans,
unsigned long caller_ip,
diff --git a/fs/bcachefs/two_state_shared_lock.h b/fs/bcachefs/two_state_shared_lock.h
index 7f647846b511..95986f5ef894 100644
--- a/fs/bcachefs/two_state_shared_lock.h
+++ b/fs/bcachefs/two_state_shared_lock.h
@@ -15,14 +15,28 @@
typedef struct {
atomic_long_t v;
wait_queue_head_t wait;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ struct lockdep_map dep_map;
+#endif
} two_state_lock_t;
-static inline void two_state_lock_init(two_state_lock_t *lock)
+static inline void __two_state_lock_init(two_state_lock_t *lock,
+ const char *name, struct lock_class_key *key)
{
atomic_long_set(&lock->v, 0);
init_waitqueue_head(&lock->wait);
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+ debug_check_no_locks_freed((void *) lock, sizeof(*lock));
+ lockdep_init_map(&lock->dep_map, name, key, 0);
+#endif
}
+#define two_state_lock_init(_lock) \
+do { \
+ static struct lock_class_key __key; \
+ __two_state_lock_init((_lock), #_lock, &__key); \
+} while (0)
+
static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s)
{
long i = s ? 1 : -1;
@@ -31,9 +45,11 @@ static inline void bch2_two_state_unlock(two_state_lock_t *lock, int s)
if (atomic_long_sub_return_release(i, &lock->v) == 0)
wake_up_all(&lock->wait);
+
+ lock_release(&lock->dep_map, _THIS_IP_);
}
-static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
+static inline bool __bch2_two_state_trylock(two_state_lock_t *lock, int s)
{
long i = s ? 1 : -1;
long old;
@@ -47,11 +63,20 @@ static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
return true;
}
+static inline bool bch2_two_state_trylock(two_state_lock_t *lock, int s)
+{
+ bool ret = __bch2_two_state_trylock(lock, s);
+ if (ret)
+ lock_acquire_exclusive(&lock->dep_map, 0, true, NULL, _THIS_IP_);
+ return ret;
+}
+
void __bch2_two_state_lock(two_state_lock_t *, int);
static inline void bch2_two_state_lock(two_state_lock_t *lock, int s)
{
- if (!bch2_two_state_trylock(lock, s))
+ lock_acquire_exclusive(&lock->dep_map, 0, 0, NULL, _THIS_IP_);
+ if (!__bch2_two_state_trylock(lock, s))
__bch2_two_state_lock(lock, s);
}
diff --git a/fs/bcachefs/xattr.c b/fs/bcachefs/xattr.c
index 6094b568dd33..6d7303008b19 100644
--- a/fs/bcachefs/xattr.c
+++ b/fs/bcachefs/xattr.c
@@ -4,6 +4,7 @@
#include "acl.h"
#include "bkey_methods.h"
#include "btree_update.h"
+#include "dirent.h"
#include "extents.h"
#include "fs.h"
#include "rebalance.h"
@@ -25,7 +26,7 @@ static u64 bch2_xattr_hash(const struct bch_hash_info *info,
bch2_str_hash_update(&ctx, info, &key->type, sizeof(key->type));
bch2_str_hash_update(&ctx, info, key->name.name, key->name.len);
- return bch2_str_hash_end(&ctx, info);
+ return bch2_str_hash_end(&ctx, info, false);
}
static u64 xattr_hash_key(const struct bch_hash_info *info, const void *key)
@@ -484,6 +485,22 @@ static int inode_opt_set_fn(struct btree_trans *trans,
return ret;
}
+ if (s->id == Inode_opt_inodes_32bit &&
+ !bch2_request_incompat_feature(trans->c, bcachefs_metadata_version_31bit_dirent_offset)) {
+ /*
+ * Make sure the dir is empty, as otherwise we'd need to
+ * rehash everything and update the dirent keys.
+ */
+ int ret = bch2_empty_dir_trans(trans, inode_inum(inode));
+ if (ret < 0)
+ return ret;
+
+ if (s->defined)
+ bi->bi_flags |= BCH_INODE_31bit_dirent_offset;
+ else
+ bi->bi_flags &= ~BCH_INODE_31bit_dirent_offset;
+ }
+
if (s->defined)
bi->bi_fields_set |= 1U << s->id;
else