diff options
-rw-r--r-- | fs/bcachefs/acl.c | 4 | ||||
-rw-r--r-- | fs/bcachefs/alloc_background.c | 18 | ||||
-rw-r--r-- | fs/bcachefs/btree_gc.c | 31 | ||||
-rw-r--r-- | fs/bcachefs/btree_io.c | 50 | ||||
-rw-r--r-- | fs/bcachefs/btree_iter.c | 5 | ||||
-rw-r--r-- | fs/bcachefs/btree_key_cache.c | 8 | ||||
-rw-r--r-- | fs/bcachefs/btree_node_scan.c | 36 | ||||
-rw-r--r-- | fs/bcachefs/btree_update_interior.c | 41 | ||||
-rw-r--r-- | fs/bcachefs/errcode.h | 8 | ||||
-rw-r--r-- | fs/bcachefs/extents.c | 18 | ||||
-rw-r--r-- | fs/bcachefs/extents.h | 2 | ||||
-rw-r--r-- | fs/bcachefs/fs-io-direct.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/fsck.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/io_write.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/journal.c | 3 | ||||
-rw-r--r-- | fs/bcachefs/journal_io.c | 2 | ||||
-rw-r--r-- | fs/bcachefs/move.c | 16 | ||||
-rw-r--r-- | fs/bcachefs/recovery.c | 7 | ||||
-rw-r--r-- | fs/bcachefs/super.c | 2 | ||||
-rw-r--r-- | mm/shrinker.c | 1 |
20 files changed, 176 insertions, 92 deletions
diff --git a/fs/bcachefs/acl.c b/fs/bcachefs/acl.c index 307824d6eccb..8f970dc19dea 100644 --- a/fs/bcachefs/acl.c +++ b/fs/bcachefs/acl.c @@ -138,8 +138,8 @@ static struct posix_acl *bch2_acl_from_disk(struct btree_trans *trans, acl = allocate_dropping_locks(trans, ret, posix_acl_alloc(count, _gfp)); - if (!acl) - return ERR_PTR(-ENOMEM); + if (!acl && !ret) + ret = bch_err_throw(trans->c, ENOMEM_acl); if (ret) { kfree(acl); return ERR_PTR(ret); diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c index 4c1604fd80f9..d5cb07c7f4d6 100644 --- a/fs/bcachefs/alloc_background.c +++ b/fs/bcachefs/alloc_background.c @@ -337,9 +337,10 @@ void bch2_alloc_v4_swab(struct bkey_s k) } static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, - unsigned dev, const struct bch_alloc_v4 *a) + struct bkey_s_c k, + const struct bch_alloc_v4 *a) { - struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, dev) : NULL; + struct bch_dev *ca = c ? bch2_dev_tryget_noerror(c, k.k->p.inode) : NULL; prt_newline(out); printbuf_indent_add(out, 2); @@ -348,11 +349,14 @@ static inline void __bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs * bch2_prt_data_type(out, a->data_type); prt_newline(out); prt_printf(out, "journal_seq_nonempty %llu\n", a->journal_seq_nonempty); - prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); + if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, journal_seq_empty)) + prt_printf(out, "journal_seq_empty %llu\n", a->journal_seq_empty); + prt_printf(out, "need_discard %llu\n", BCH_ALLOC_V4_NEED_DISCARD(a)); prt_printf(out, "need_inc_gen %llu\n", BCH_ALLOC_V4_NEED_INC_GEN(a)); prt_printf(out, "dirty_sectors %u\n", a->dirty_sectors); - prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); + if (bkey_val_bytes(k.k) > offsetof(struct bch_alloc_v4, stripe_sectors)) + prt_printf(out, "stripe_sectors %u\n", a->stripe_sectors); prt_printf(out, "cached_sectors %u\n", a->cached_sectors); prt_printf(out, "stripe %u\n", a->stripe); prt_printf(out, "stripe_redundancy %u\n", a->stripe_redundancy); @@ -372,12 +376,12 @@ void bch2_alloc_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c struct bch_alloc_v4 _a; const struct bch_alloc_v4 *a = bch2_alloc_to_v4(k, &_a); - __bch2_alloc_v4_to_text(out, c, k.k->p.inode, a); + __bch2_alloc_v4_to_text(out, c, k, a); } void bch2_alloc_v4_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { - __bch2_alloc_v4_to_text(out, c, k.k->p.inode, bkey_s_c_to_alloc_v4(k).v); + __bch2_alloc_v4_to_text(out, c, k, bkey_s_c_to_alloc_v4(k).v); } void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) @@ -385,7 +389,7 @@ void __bch2_alloc_to_v4(struct bkey_s_c k, struct bch_alloc_v4 *out) if (k.k->type == KEY_TYPE_alloc_v4) { void *src, *dst; - *out = *bkey_s_c_to_alloc_v4(k).v; + bkey_val_copy(out, bkey_s_c_to_alloc_v4(k)); src = alloc_v4_backpointers(out); SET_BCH_ALLOC_V4_BACKPOINTERS_START(out, BCH_ALLOC_V4_U64s); diff --git a/fs/bcachefs/btree_gc.c b/fs/bcachefs/btree_gc.c index 34cb8a4324dc..e95bb6849aef 100644 --- a/fs/bcachefs/btree_gc.c +++ b/fs/bcachefs/btree_gc.c @@ -44,10 +44,6 @@ #include <linux/rcupdate.h> #include <linux/sched/task.h> -#define DROP_THIS_NODE 10 -#define DROP_PREV_NODE 11 -#define DID_FILL_FROM_SCAN 12 - /* * Returns true if it's a btree we can easily reconstruct, or otherwise won't * cause data loss if it's missing: @@ -252,7 +248,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * return ret; *pulled_from_scan = cur->data->min_key; - ret = DID_FILL_FROM_SCAN; + ret = bch_err_throw(c, topology_repair_did_fill_from_scan); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, "btree node with incorrect min_key%s", buf.buf)) @@ -263,7 +259,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_ge(prev->data->min_key, cur->data->min_key)) { /* fully? */ if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_next_node, "btree node overwritten by next node%s", buf.buf)) - ret = DROP_PREV_NODE; + ret = bch_err_throw(c, topology_repair_drop_prev_node); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_max_key, "btree node with incorrect max_key%s", buf.buf)) @@ -274,7 +270,7 @@ static int btree_check_node_boundaries(struct btree_trans *trans, struct btree * if (bpos_ge(expected_start, cur->data->max_key)) { /* fully? */ if (mustfix_fsck_err(trans, btree_node_topology_overwritten_by_prev_node, "btree node overwritten by prev node%s", buf.buf)) - ret = DROP_THIS_NODE; + ret = bch_err_throw(c, topology_repair_drop_this_node); } else { if (mustfix_fsck_err(trans, btree_node_topology_bad_min_key, "btree node with incorrect min_key%s", buf.buf)) @@ -314,7 +310,7 @@ static int btree_repair_node_end(struct btree_trans *trans, struct btree *b, return ret; *pulled_from_scan = b->key.k.p; - ret = DID_FILL_FROM_SCAN; + ret = bch_err_throw(c, topology_repair_did_fill_from_scan); } else { ret = set_node_max(c, child, b->key.k.p); } @@ -391,15 +387,15 @@ again: ret = lockrestart_do(trans, btree_check_node_boundaries(trans, b, prev, cur, pulled_from_scan)); - if (ret < 0) + if (ret && !bch2_err_matches(ret, BCH_ERR_topology_repair)) goto err; - if (ret == DID_FILL_FROM_SCAN) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { new_pass = true; ret = 0; } - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { six_unlock_read(&cur->c.lock); bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, @@ -414,7 +410,7 @@ again: six_unlock_read(&prev->c.lock); prev = NULL; - if (ret == DROP_PREV_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_prev_node)) { bch_info(c, "dropped prev node"); bch2_btree_node_evict(trans, prev_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, @@ -436,7 +432,7 @@ again: BUG_ON(cur); ret = lockrestart_do(trans, btree_repair_node_end(trans, b, prev, pulled_from_scan)); - if (ret == DID_FILL_FROM_SCAN) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_did_fill_from_scan)) { new_pass = true; ret = 0; } @@ -477,7 +473,7 @@ again: six_unlock_read(&cur->c.lock); cur = NULL; - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { bch2_btree_node_evict(trans, cur_k.k); ret = bch2_journal_key_delete(c, b->c.btree_id, b->c.level, cur_k.k->k.p); @@ -504,7 +500,7 @@ again: if (mustfix_fsck_err_on(!have_child, c, btree_node_topology_interior_node_empty, "empty interior btree node at %s", buf.buf)) - ret = DROP_THIS_NODE; + ret = bch_err_throw(c, topology_repair_drop_this_node); err: fsck_err: if (!IS_ERR_OR_NULL(prev)) @@ -521,7 +517,8 @@ fsck_err: bch2_bkey_buf_exit(&prev_k, c); bch2_bkey_buf_exit(&cur_k, c); - bch_err_fn(c, ret); + if (!bch2_err_matches(ret, BCH_ERR_topology_repair)) + bch_err_fn(c, ret); return ret; } @@ -592,7 +589,7 @@ recover: ret = bch2_btree_repair_topology_recurse(trans, b, &pulled_from_scan); six_unlock_read(&b->c.lock); - if (ret == DROP_THIS_NODE) { + if (bch2_err_matches(ret, BCH_ERR_topology_repair_drop_this_node)) { scoped_guard(mutex, &c->btree_cache.lock) bch2_btree_node_hash_remove(&c->btree_cache, b); diff --git a/fs/bcachefs/btree_io.c b/fs/bcachefs/btree_io.c index bd86dd7151a1..2bd8422eb72c 100644 --- a/fs/bcachefs/btree_io.c +++ b/fs/bcachefs/btree_io.c @@ -563,15 +563,11 @@ static int __btree_err(int ret, struct printbuf *err_msg, const char *fmt, ...) { - if (c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes) - return ret == -BCH_ERR_btree_node_read_err_fixable - ? bch_err_throw(c, fsck_fix) - : ret; - + bool in_scan = c->recovery.curr_pass == BCH_RECOVERY_PASS_scan_for_btree_nodes; bool have_retry = false; int ret2; - if (ca) { + if (ca && !in_scan) { bch2_mark_btree_validate_failure(failed, ca->dev_idx); struct extent_ptr_decoded pick; @@ -585,12 +581,14 @@ static int __btree_err(int ret, if (!have_retry && ret == -BCH_ERR_btree_node_read_err_must_retry) ret = bch_err_throw(c, btree_node_read_err_bad_node); - bch2_sb_error_count(c, err_type); + if (!in_scan) + bch2_sb_error_count(c, err_type); bool print_deferred = err_msg && rw == READ && - !(test_bit(BCH_FS_in_fsck, &c->flags) && - c->opts.fix_errors == FSCK_FIX_ask); + (!(test_bit(BCH_FS_in_fsck, &c->flags) && + c->opts.fix_errors == FSCK_FIX_ask) || + in_scan); CLASS(printbuf, out)(); bch2_log_msg_start(c, &out); @@ -603,11 +601,17 @@ static int __btree_err(int ret, va_list args; va_start(args, fmt); prt_vprintf(err_msg, fmt, args); - va_end(args); + va_end(args);; - if (print_deferred) { + if (print_deferred) prt_newline(err_msg); + if (in_scan) + return ret == -BCH_ERR_btree_node_read_err_fixable + ? bch_err_throw(c, fsck_fix) + : ret; + + if (print_deferred) { switch (ret) { case -BCH_ERR_btree_node_read_err_fixable: ret2 = bch2_fsck_err_opt(c, FSCK_CAN_FIX, err_type); @@ -1405,10 +1409,8 @@ static void btree_node_read_work(struct work_struct *work) ret = bch2_bkey_pick_read_device(c, bkey_i_to_s_c(&b->key), &failed, &rb->pick, -1); - if (ret <= 0) { - set_btree_node_read_error(b); + if (ret <= 0) break; - } ca = bch2_dev_get_ioref(c, rb->pick.ptr.dev, READ, BCH_DEV_READ_REF_btree_node_read); rb->have_ioref = ca != NULL; @@ -1442,27 +1444,21 @@ start: bch2_maybe_corrupt_bio(bio, bch2_btree_read_corrupt_ratio); ret = bch2_btree_node_read_done(c, ca, b, &failed, &buf); - if (ret == -BCH_ERR_btree_node_read_err_want_retry || - ret == -BCH_ERR_btree_node_read_err_must_retry) - continue; - - if (ret) - set_btree_node_read_error(b); - - break; + if (ret != -BCH_ERR_btree_node_read_err_want_retry && + ret != -BCH_ERR_btree_node_read_err_must_retry) + break; } bch2_io_failures_to_text(&buf, c, &failed); - if (btree_node_read_error(b)) - bch2_btree_lost_data(c, &buf, b->c.btree_id); - /* * only print retry success if we read from a replica with no errors */ - if (btree_node_read_error(b)) + if (ret) { + set_btree_node_read_error(b); + bch2_btree_lost_data(c, &buf, b->c.btree_id); prt_printf(&buf, "ret %s", bch2_err_str(ret)); - else if (failed.nr) { + } else if (failed.nr) { if (!bch2_dev_io_failures(&failed, rb->pick.ptr.dev)) prt_printf(&buf, "retry success"); else diff --git a/fs/bcachefs/btree_iter.c b/fs/bcachefs/btree_iter.c index cc771affa511..a282c3886168 100644 --- a/fs/bcachefs/btree_iter.c +++ b/fs/bcachefs/btree_iter.c @@ -2860,8 +2860,9 @@ struct bkey_s_c bch2_btree_iter_peek_slot(struct btree_trans *trans, struct btre !bkey_deleted(k.k) && (k2 = btree_trans_peek_key_cache(trans, iter, iter->pos)).k) { k = k2; - if (!bkey_err(k)) - iter->k = *k.k; + if (bkey_err(k)) + goto out; + iter->k = *k.k; } if (unlikely(k.k->type == KEY_TYPE_whiteout && diff --git a/fs/bcachefs/btree_key_cache.c b/fs/bcachefs/btree_key_cache.c index ebba14da92b4..d61b782087ce 100644 --- a/fs/bcachefs/btree_key_cache.c +++ b/fs/bcachefs/btree_key_cache.c @@ -254,11 +254,13 @@ static int btree_key_cache_create(struct btree_trans *trans, struct bkey_i *new_k = allocate_dropping_locks(trans, ret, kmalloc(key_u64s * sizeof(u64), _gfp)); - if (unlikely(!new_k)) { + if (unlikely(!new_k && !ret)) { bch_err(trans->c, "error allocating memory for key cache key, btree %s u64s %u", bch2_btree_id_str(ck->key.btree_id), key_u64s); ret = bch_err_throw(c, ENOMEM_btree_key_cache_fill); - } else if (ret) { + } + + if (unlikely(ret)) { kfree(new_k); goto err; } @@ -407,7 +409,7 @@ int bch2_btree_path_traverse_cached(struct btree_trans *trans, btree_node_unlock(trans, path, 0); path->l[0].b = ERR_PTR(ret); } - } else { + } else if (!(flags & BTREE_ITER_cached_nofill)) { BUG_ON(path->uptodate); BUG_ON(!path->nodes_locked); } diff --git a/fs/bcachefs/btree_node_scan.c b/fs/bcachefs/btree_node_scan.c index d997e3818c30..c518e18ce5dc 100644 --- a/fs/bcachefs/btree_node_scan.c +++ b/fs/bcachefs/btree_node_scan.c @@ -155,17 +155,6 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, if (BTREE_NODE_LEVEL(bn) >= BTREE_MAX_DEPTH) return; - if (BTREE_NODE_ID(bn) >= BTREE_ID_NR_MAX) - return; - - bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); - bio->bi_iter.bi_sector = offset; - bch2_bio_map(bio, b->data, c->opts.btree_node_size); - - submit_time = local_clock(); - submit_bio_wait(bio); - bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); - rcu_read_lock(); struct found_btree_node n = { .btree_id = BTREE_NODE_ID(bn), @@ -182,10 +171,26 @@ static void try_read_btree_node(struct find_btree_nodes *f, struct bch_dev *ca, }; rcu_read_unlock(); + bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ); + bio->bi_iter.bi_sector = offset; + bch2_bio_map(bio, b->data, c->opts.btree_node_size); + + submit_time = local_clock(); + submit_bio_wait(bio); + bch2_account_io_completion(ca, BCH_MEMBER_ERROR_read, submit_time, !bio->bi_status); + found_btree_node_to_key(&b->key, &n); CLASS(printbuf, buf)(); - if (!bch2_btree_node_read_done(c, ca, b, NULL, &buf)) { + + found_btree_node_to_text(&buf, c, &n); + prt_newline(&buf); + + int ret = bch2_btree_node_read_done(c, ca, b, NULL, &buf); + + bch_verbose(ca, "attempted to read, ret %s\n%s", bch2_err_str(ret), buf.buf); + + if (!ret) { /* read_done will swap out b->data for another buffer */ bn = b->data; /* @@ -270,6 +275,9 @@ static int read_btree_nodes(struct find_btree_nodes *f) int ret = 0; closure_init_stack(&cl); + CLASS(printbuf, buf)(); + + prt_printf(&buf, "scanning for btree nodes on"); for_each_online_member(c, ca, BCH_DEV_READ_REF_btree_node_scan) { if (!(ca->mi.data_allowed & BIT(BCH_DATA_btree))) @@ -295,10 +303,14 @@ static int read_btree_nodes(struct find_btree_nodes *f) break; } + prt_printf(&buf, " %s", ca->name); + closure_get(&cl); enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_btree_node_scan); wake_up_process(t); } + + bch_notice(c, "%s", buf.buf); err: while (closure_sync_timeout(&cl, sysctl_hung_task_timeout_secs * HZ / 2)) ; diff --git a/fs/bcachefs/btree_update_interior.c b/fs/bcachefs/btree_update_interior.c index 312ef203b27b..e4aa4fa749bc 100644 --- a/fs/bcachefs/btree_update_interior.c +++ b/fs/bcachefs/btree_update_interior.c @@ -14,6 +14,7 @@ #include "btree_locking.h" #include "buckets.h" #include "clock.h" +#include "disk_groups.h" #include "enumerated_ref.h" #include "error.h" #include "extents.h" @@ -277,6 +278,36 @@ static void bch2_btree_node_free_never_used(struct btree_update *as, bch2_trans_node_drop(trans, b); } +static bool can_use_btree_node(struct bch_fs *c, + struct disk_reservation *res, + unsigned target, + struct bkey_s_c k) +{ + if (!bch2_bkey_devs_rw(c, k)) + return false; + + if (target && !bch2_bkey_in_target(c, k, target)) + return false; + + unsigned durability = bch2_bkey_durability(c, k); + + if (durability >= res->nr_replicas) + return true; + + struct bch_devs_mask devs = target_rw_devs(c, BCH_DATA_btree, target); + + guard(rcu)(); + + unsigned durability_available = 0, i; + for_each_set_bit(i, devs.d, BCH_SB_MEMBERS_MAX) { + struct bch_dev *ca = bch2_dev_rcu_noerror(c, i); + if (ca) + durability_available += ca->mi.durability; + } + + return durability >= durability_available; +} + static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, struct disk_reservation *res, struct closure *cl, @@ -303,10 +334,14 @@ static struct btree *__bch2_btree_node_alloc(struct btree_trans *trans, mutex_lock(&c->btree_reserve_cache_lock); if (c->btree_reserve_cache_nr > nr_reserve) { for (struct btree_alloc *a = c->btree_reserve_cache; - a < c->btree_reserve_cache + c->btree_reserve_cache_nr; - a++) { - if (target && !bch2_bkey_in_target(c, bkey_i_to_s_c(&a->k), target)) + a < c->btree_reserve_cache + c->btree_reserve_cache_nr;) { + /* check if it has sufficient durability */ + + if (!can_use_btree_node(c, res, target, bkey_i_to_s_c(&a->k))) { + bch2_open_buckets_put(c, &a->ob); + *a = c->btree_reserve_cache[--c->btree_reserve_cache_nr]; continue; + } bkey_copy(&b->key, &a->k); b->ob = a->ob; diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h index 2de0dc91a69e..cec8b0f47d3d 100644 --- a/fs/bcachefs/errcode.h +++ b/fs/bcachefs/errcode.h @@ -90,6 +90,8 @@ x(ENOMEM, ENOMEM_disk_accounting) \ x(ENOMEM, ENOMEM_stripe_head_alloc) \ x(ENOMEM, ENOMEM_journal_read_bucket) \ + x(ENOMEM, ENOMEM_acl) \ + x(ENOMEM, ENOMEM_move_extent) \ x(ENOSPC, ENOSPC_disk_reservation) \ x(ENOSPC, ENOSPC_bucket_alloc) \ x(ENOSPC, ENOSPC_disk_label_add) \ @@ -216,9 +218,13 @@ x(EINVAL, varint_decode_error) \ x(EINVAL, erasure_coding_found_btree_node) \ x(EINVAL, option_negative) \ + x(EINVAL, topology_repair) \ + x(BCH_ERR_topology_repair, topology_repair_drop_this_node) \ + x(BCH_ERR_topology_repair, topology_repair_drop_prev_node) \ + x(BCH_ERR_topology_repair, topology_repair_did_fill_from_scan) \ x(EOPNOTSUPP, may_not_use_incompat_feature) \ x(EOPNOTSUPP, no_casefolding_without_utf8) \ - x(EOPNOTSUPP, casefolding_disabled) \ + x(EOPNOTSUPP, casefolding_disabled) \ x(EOPNOTSUPP, casefold_opt_is_dir_only) \ x(EOPNOTSUPP, unsupported_fsx_flag) \ x(EOPNOTSUPP, unsupported_fa_flag) \ diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index b36ecfc0ab9d..b879a586b7f6 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -282,9 +282,9 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, if (have_pick) return 1; - if (!have_dirty_ptrs) + if (!have_dirty_ptrs && !bkey_is_btree_ptr(k.k)) return 0; - if (have_missing_devs) + if (have_missing_devs || !have_dirty_ptrs) return bch_err_throw(c, no_device_to_read_from); if (have_csum_errors) return bch_err_throw(c, data_read_csum_err); @@ -1006,6 +1006,20 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned return NULL; } +bool bch2_bkey_devs_rw(struct bch_fs *c, struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + + guard(rcu)(); + bkey_for_each_ptr(ptrs, ptr) { + CLASS(bch2_dev_tryget, ca)(c, ptr->dev); + if (!ca || ca->mi.state != BCH_MEMBER_STATE_rw) + return false; + } + + return true; +} + bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h index f212f91c278d..35ee03cd5065 100644 --- a/fs/bcachefs/extents.h +++ b/fs/bcachefs/extents.h @@ -614,6 +614,8 @@ static inline struct bch_extent_ptr *bch2_bkey_has_device(struct bkey_s k, unsig return (void *) bch2_bkey_has_device_c(k.s_c, dev); } +bool bch2_bkey_devs_rw(struct bch_fs *, struct bkey_s_c); + bool bch2_bkey_has_target(struct bch_fs *, struct bkey_s_c, unsigned); bool bch2_bkey_in_target(struct bch_fs *, struct bkey_s_c, unsigned); diff --git a/fs/bcachefs/fs-io-direct.c b/fs/bcachefs/fs-io-direct.c index 73d44875faf2..e53fee0513fd 100644 --- a/fs/bcachefs/fs-io-direct.c +++ b/fs/bcachefs/fs-io-direct.c @@ -127,7 +127,7 @@ static int bch2_direct_IO_read(struct kiocb *req, struct iov_iter *iter) * the dirtying of requests that are internal from the kernel (i.e. from * loopback), because we'll deadlock on page_lock. */ - dio->should_dirty = iter_is_iovec(iter); + dio->should_dirty = user_backed_iter(iter); blk_start_plug(&plug); diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c index df0aa2522b18..183b88bbd402 100644 --- a/fs/bcachefs/fsck.c +++ b/fs/bcachefs/fsck.c @@ -1975,6 +1975,10 @@ static int check_extent(struct btree_trans *trans, struct btree_iter *iter, } } + ret = check_extent_overbig(trans, iter, k); + if (ret) + goto err; + ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); if (ret) goto err; @@ -2021,8 +2025,7 @@ int bch2_check_extents(struct bch_fs *c) POS(BCACHEFS_ROOT_INO, 0), BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ bch2_disk_reservation_put(c, &res); - check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: - check_extent_overbig(trans, &iter, k); + check_extent(trans, &iter, k, &w, &s, &extent_ends, &res); })) ?: check_i_sectors_notnested(trans, &w); diff --git a/fs/bcachefs/io_write.c b/fs/bcachefs/io_write.c index d7620138e038..44b02d4b6502 100644 --- a/fs/bcachefs/io_write.c +++ b/fs/bcachefs/io_write.c @@ -89,7 +89,12 @@ void bch2_latency_acct(struct bch_dev *ca, u64 submit_time, int rw) new = ewma_add(old, io_latency, 5); } while (!atomic64_try_cmpxchg(latency, &old, new)); - bch2_congested_acct(ca, io_latency, now, rw); + /* + * Only track read latency for congestion accounting: writes are subject + * to heavy queuing delays from page cache writeback: + */ + if (rw == READ) + bch2_congested_acct(ca, io_latency, now, rw); __bch2_time_stats_update(&ca->io_latency[rw].stats, submit_time, now); } diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c index de03e20f6e30..97760e89e5a3 100644 --- a/fs/bcachefs/journal.c +++ b/fs/bcachefs/journal.c @@ -182,6 +182,8 @@ journal_error_check_stuck(struct journal *j, int error, unsigned flags) void bch2_journal_do_writes(struct journal *j) { + struct bch_fs *c = container_of(j, struct bch_fs, journal); + for (u64 seq = journal_last_unwritten_seq(j); seq <= journal_cur_seq(j); seq++) { @@ -196,6 +198,7 @@ void bch2_journal_do_writes(struct journal *j) if (!journal_state_seq_count(j, j->reservations, seq)) { j->seq_write_started = seq; w->write_started = true; + closure_get(&c->cl); closure_call(&w->io, bch2_journal_write, j->wq, NULL); } diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c index 2835250a14c4..47224666d07e 100644 --- a/fs/bcachefs/journal_io.c +++ b/fs/bcachefs/journal_io.c @@ -1820,6 +1820,8 @@ static CLOSURE_CALLBACK(journal_write_done) if (do_discards) bch2_do_discards(c); + + closure_put(&c->cl); } static void journal_write_endio(struct bio *bio) diff --git a/fs/bcachefs/move.c b/fs/bcachefs/move.c index 54dd6fec81db..84a228c42f06 100644 --- a/fs/bcachefs/move.c +++ b/fs/bcachefs/move.c @@ -330,7 +330,7 @@ int bch2_move_extent(struct moving_context *ctxt, { struct btree_trans *trans = ctxt->trans; struct bch_fs *c = trans->c; - int ret = -ENOMEM; + int ret = 0; if (trace_io_move_enabled()) trace_io_move2(c, k, &io_opts, &data_opts); @@ -351,11 +351,10 @@ int bch2_move_extent(struct moving_context *ctxt, struct moving_io *io = allocate_dropping_locks(trans, ret, kzalloc(sizeof(struct moving_io), _gfp)); - if (!io) - goto err; - + if (!io && !ret) + ret = bch_err_throw(c, ENOMEM_move_extent); if (ret) - goto err_free; + goto err; INIT_LIST_HEAD(&io->io_list); io->write.ctxt = ctxt; @@ -366,7 +365,7 @@ int bch2_move_extent(struct moving_context *ctxt, ret = bch2_data_update_init(trans, iter, ctxt, &io->write, ctxt->wp, &io_opts, data_opts, iter->btree_id, k); if (ret) - goto err_free; + goto err; io->write.op.end_io = move_write_done; } else { @@ -380,7 +379,7 @@ int bch2_move_extent(struct moving_context *ctxt, ret = bch2_data_update_bios_init(&io->write, c, &io_opts); if (ret) - goto err_free; + goto err; } io->write.rbio.bio.bi_end_io = move_read_endio; @@ -423,9 +422,8 @@ int bch2_move_extent(struct moving_context *ctxt, BCH_READ_last_fragment, data_opts.scrub ? data_opts.read_dev : -1); return 0; -err_free: - kfree(io); err: + kfree(io); if (bch2_err_matches(ret, EROFS) || bch2_err_matches(ret, BCH_ERR_transaction_restart)) return ret; diff --git a/fs/bcachefs/recovery.c b/fs/bcachefs/recovery.c index 58c159e5f10d..304473dac268 100644 --- a/fs/bcachefs/recovery.c +++ b/fs/bcachefs/recovery.c @@ -67,13 +67,16 @@ int bch2_btree_lost_data(struct bch_fs *c, ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_backpointers_to_extents, 0, &write_sb) ?: ret; #endif + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_backpointer_to_missing_ptr, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); + write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); + switch (btree) { case BTREE_ID_alloc: ret = __bch2_run_explicit_recovery_pass(c, msg, BCH_RECOVERY_PASS_check_alloc_info, 0, &write_sb) ?: ret; - write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent); - write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent); write_sb |= !__test_and_set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent); diff --git a/fs/bcachefs/super.c b/fs/bcachefs/super.c index 0fc0b2221036..b3b2d8353a36 100644 --- a/fs/bcachefs/super.c +++ b/fs/bcachefs/super.c @@ -729,6 +729,8 @@ void __bch2_fs_stop(struct bch_fs *c) cancel_work_sync(&ca->io_error_work); cancel_work_sync(&c->read_only_work); + + flush_work(&c->btree_interior_update_work); } void bch2_fs_free(struct bch_fs *c) diff --git a/mm/shrinker.c b/mm/shrinker.c index cbadd4d4437c..c94eedf2cfd8 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -833,7 +833,6 @@ void shrinker_to_text(struct seq_buf *out, struct shrinker *shrinker) }; unsigned long nr_freed = atomic_long_read(&shrinker->objects_freed); - seq_buf_puts(out, shrinker->name); seq_buf_putc(out, '\n'); |