diff options
Diffstat (limited to 'fs/bcachefs/io_read.c')
-rw-r--r-- | fs/bcachefs/io_read.c | 240 |
1 files changed, 147 insertions, 93 deletions
diff --git a/fs/bcachefs/io_read.c b/fs/bcachefs/io_read.c index 210b6adc359f..e7d53ab1cf55 100644 --- a/fs/bcachefs/io_read.c +++ b/fs/bcachefs/io_read.c @@ -39,38 +39,73 @@ MODULE_PARM_DESC(read_corrupt_ratio, ""); #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT +static inline u32 bch2_dev_congested_read(struct bch_dev *ca, u64 now) +{ + s64 congested = atomic_read(&ca->congested); + u64 last = READ_ONCE(ca->congested_last); + if (time_after64(now, last)) + congested -= (now - last) >> 12; + + return clamp(congested, 0LL, CONGESTED_MAX); +} + static bool bch2_target_congested(struct bch_fs *c, u16 target) { const struct bch_devs_mask *devs; unsigned d, nr = 0, total = 0; - u64 now = local_clock(), last; - s64 congested; - struct bch_dev *ca; - - if (!target) - return false; + u64 now = local_clock(); guard(rcu)(); devs = bch2_target_to_mask(c, target) ?: &c->rw_devs[BCH_DATA_user]; for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { - ca = rcu_dereference(c->devs[d]); + struct bch_dev *ca = rcu_dereference(c->devs[d]); if (!ca) continue; - congested = atomic_read(&ca->congested); - last = READ_ONCE(ca->congested_last); - if (time_after64(now, last)) - congested -= (now - last) >> 12; - - total += max(congested, 0LL); + total += bch2_dev_congested_read(ca, now); nr++; } return get_random_u32_below(nr * CONGESTED_MAX) < total; } +void bch2_dev_congested_to_text(struct printbuf *out, struct bch_dev *ca) +{ + printbuf_tabstop_push(out, 32); + + prt_printf(out, "current:\t%u%%\n", + bch2_dev_congested_read(ca, local_clock()) * + 100 / CONGESTED_MAX); + + prt_printf(out, "raw:\t%i/%u\n", atomic_read(&ca->congested), CONGESTED_MAX); + + prt_printf(out, "last io over threshold:\t"); + bch2_pr_time_units(out, local_clock() - ca->congested_last); + prt_newline(out); + + prt_printf(out, "read latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(1)].m << 2); + prt_newline(out); + + prt_printf(out, "median read latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[READ].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); + + prt_printf(out, "write latency threshold:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(1)].m << 3); + prt_newline(out); + + prt_printf(out, "median write latency:\t"); + bch2_pr_time_units(out, + ca->io_latency[WRITE].quantiles.entries[QUANTILE_IDX(7)].m); + prt_newline(out); +} + #else static bool bch2_target_congested(struct bch_fs *c, u16 target) @@ -130,22 +165,32 @@ static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, if (!have_io_error(failed)) { BUG_ON(!opts.promote_target); - if (!(flags & BCH_READ_may_promote)) + if (!(flags & BCH_READ_may_promote)) { + count_event(c, io_read_nopromote_may_not); return bch_err_throw(c, nopromote_may_not); + } - if (bch2_bkey_has_target(c, k, opts.promote_target)) + if (bch2_bkey_has_target(c, k, opts.promote_target)) { + count_event(c, io_read_nopromote_already_promoted); return bch_err_throw(c, nopromote_already_promoted); + } - if (bkey_extent_is_unwritten(k)) + if (bkey_extent_is_unwritten(k)) { + count_event(c, io_read_nopromote_unwritten); return bch_err_throw(c, nopromote_unwritten); + } - if (bch2_target_congested(c, opts.promote_target)) + if (bch2_target_congested(c, opts.promote_target)) { + count_event(c, io_read_nopromote_congested); return bch_err_throw(c, nopromote_congested); + } } if (rhashtable_lookup_fast(&c->promote_table, &pos, - bch_promote_params)) + bch_promote_params)) { + count_event(c, io_read_nopromote_in_flight); return bch_err_throw(c, nopromote_in_flight); + } return 0; } @@ -160,6 +205,7 @@ static noinline void promote_free(struct bch_read_bio *rbio) BUG_ON(ret); async_object_list_del(c, promote, op->list_idx); + async_object_list_del(c, rbio, rbio->list_idx); bch2_data_update_exit(&op->write); @@ -343,16 +389,27 @@ static struct bch_read_bio *promote_alloc(struct btree_trans *trans, return promote; nopromote: - trace_io_read_nopromote(c, ret); + if (trace_io_read_nopromote_enabled()) { + CLASS(printbuf, buf)(); + printbuf_indent_add_nextline(&buf, 2); + prt_printf(&buf, "%s\n", bch2_err_str(ret)); + bch2_bkey_val_to_text(&buf, c, k); + + trace_io_read_nopromote(c, buf.buf); + } + count_event(c, io_read_nopromote); + return NULL; } -void bch2_promote_op_to_text(struct printbuf *out, struct promote_op *op) +void bch2_promote_op_to_text(struct printbuf *out, + struct bch_fs *c, + struct promote_op *op) { if (!op->write.read_done) { prt_printf(out, "parent read: %px\n", op->write.rbio.parent); printbuf_indent_add(out, 2); - bch2_read_bio_to_text(out, op->write.rbio.parent); + bch2_read_bio_to_text(out, c, op->write.rbio.parent); printbuf_indent_sub(out, 2); } @@ -380,7 +437,8 @@ static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *o static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, struct bch_read_bio *rbio, struct bpos read_pos) { - bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); + CLASS(btree_trans, trans)(c); + bch2_read_err_msg_trans(trans, out, rbio, read_pos); } enum rbio_context { @@ -450,6 +508,10 @@ static void bch2_rbio_done(struct bch_read_bio *rbio) if (rbio->start_time) bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], rbio->start_time); +#ifdef CONFIG_BCACHEFS_ASYNC_OBJECT_LISTS + if (rbio->list_idx) + async_object_list_del(rbio->c, rbio, rbio->list_idx); +#endif bio_endio(&rbio->bio); } @@ -472,7 +534,7 @@ static void get_rbio_extent(struct btree_trans *trans, break; } - bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(&iter); } static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_read_bio *rbio, @@ -488,15 +550,14 @@ static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_re if (flags & BIT_ULL(BCH_EXTENT_FLAG_poisoned)) return 0; - struct btree_iter iter; - struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, btree, bkey_start_pos(read_k.k), - BTREE_ITER_intent); + CLASS(btree_iter, iter)(trans, btree, bkey_start_pos(read_k.k), BTREE_ITER_intent); + struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); int ret = bkey_err(k); if (ret) return ret; if (!bkey_and_val_eq(k, read_k)) - goto out; + return 0; struct bkey_i *new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + sizeof(struct bch_extent_flags)); @@ -505,17 +566,17 @@ static noinline int maybe_poison_extent(struct btree_trans *trans, struct bch_re bch2_bkey_extent_flags_set(c, new, flags|BIT_ULL(BCH_EXTENT_FLAG_poisoned)) ?: bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node) ?: bch2_trans_commit(trans, NULL, NULL, 0); + if (ret) + return ret; /* * Propagate key change back to data update path, in particular so it * knows the extent has been poisoned and it's safe to change the * checksum */ - if (u && !ret) + if (u) bch2_bkey_buf_copy(&u->k, c, new); -out: - bch2_trans_iter_exit(trans, &iter); - return ret; + return 0; } static noinline int bch2_read_retry_nodecode(struct btree_trans *trans, @@ -549,7 +610,7 @@ retry: bkey_i_to_s_c(u->k.k), 0, failed, flags, -1); err: - bch2_trans_iter_exit(trans, &iter); + bch2_trans_iter_exit(&iter); if (bch2_err_matches(ret, BCH_ERR_transaction_restart) || bch2_err_matches(ret, BCH_ERR_data_read_retry)) @@ -577,7 +638,7 @@ static void bch2_rbio_retry(struct work_struct *work) }; struct bch_io_failures failed = { .nr = 0 }; - struct btree_trans *trans = bch2_trans_get(c); + CLASS(btree_trans, trans)(c); struct bkey_buf sk; bch2_bkey_buf_init(&sk); @@ -619,7 +680,7 @@ static void bch2_rbio_retry(struct work_struct *work) } if (failed.nr || ret) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_log_msg_start(c, &buf); lockrestart_do(trans, @@ -647,12 +708,10 @@ static void bch2_rbio_retry(struct work_struct *work) bch2_io_failures_to_text(&buf, c, &failed); bch2_print_str_ratelimited(c, KERN_ERR, buf.buf); - printbuf_exit(&buf); } bch2_rbio_done(rbio); bch2_bkey_buf_exit(&sk, c); - bch2_trans_put(trans); } static void bch2_rbio_error(struct bch_read_bio *rbio, @@ -686,62 +745,55 @@ static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, { struct bch_fs *c = rbio->c; u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; - struct bch_extent_crc_unpacked new_crc; - struct btree_iter iter; - struct bkey_i *new; - struct bkey_s_c k; int ret = 0; if (crc_is_compressed(rbio->pick.crc)) return 0; - k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, - BTREE_ITER_slots|BTREE_ITER_intent); + CLASS(btree_iter, iter)(trans, rbio->data_btree, rbio->data_pos, BTREE_ITER_intent); + struct bkey_s_c k = bch2_btree_iter_peek_slot(&iter); if ((ret = bkey_err(k))) - goto out; + return ret; if (bversion_cmp(k.k->bversion, rbio->version) || !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) - goto out; + return 0; /* Extent was merged? */ if (bkey_start_offset(k.k) < data_offset || k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) - goto out; + return 0; + struct bch_extent_crc_unpacked new_crc; if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, rbio->pick.crc, NULL, &new_crc, bkey_start_offset(k.k) - data_offset, k.k->size, rbio->pick.crc.csum_type)) { bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); - ret = 0; - goto out; + return 0; } /* * going to be temporarily appending another checksum entry: */ - new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + - sizeof(struct bch_extent_crc128)); + struct bkey_i *new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + + sizeof(struct bch_extent_crc128)); if ((ret = PTR_ERR_OR_ZERO(new))) - goto out; + return ret; bkey_reassemble(new, k); if (!bch2_bkey_narrow_crcs(new, new_crc)) - goto out; + return 0; - ret = bch2_trans_update(trans, &iter, new, - BTREE_UPDATE_internal_snapshot_node); -out: - bch2_trans_iter_exit(trans, &iter); - return ret; + return bch2_trans_update(trans, &iter, new, BTREE_UPDATE_internal_snapshot_node); } static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) { - bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, - __bch2_rbio_narrow_crcs(trans, rbio)); + CLASS(btree_trans, trans)(rbio->c); + commit_do(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, + __bch2_rbio_narrow_crcs(trans, rbio)); } static void bch2_read_decompress_err(struct work_struct *work) @@ -749,7 +801,7 @@ static void bch2_read_decompress_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decompression error"); @@ -761,7 +813,6 @@ static void bch2_read_decompress_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decompress_err, BLK_STS_IOERR); - printbuf_exit(&buf); } static void bch2_read_decrypt_err(struct work_struct *work) @@ -769,7 +820,7 @@ static void bch2_read_decrypt_err(struct work_struct *work) struct bch_read_bio *rbio = container_of(work, struct bch_read_bio, work); struct bch_fs *c = rbio->c; - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); prt_str(&buf, "decrypt error"); @@ -781,7 +832,6 @@ static void bch2_read_decrypt_err(struct work_struct *work) bch_err_ratelimited(c, "%s", buf.buf); bch2_rbio_error(rbio, -BCH_ERR_data_read_decrypt_err, BLK_STS_IOERR); - printbuf_exit(&buf); } /* Inner part that may run in process context */ @@ -962,13 +1012,10 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, struct bch_extent_ptr ptr) { struct bch_fs *c = trans->c; - struct btree_iter iter; - struct printbuf buf = PRINTBUF; - int ret; - - bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, - PTR_BUCKET_POS(ca, &ptr), - BTREE_ITER_cached); + CLASS(printbuf, buf)(); + CLASS(btree_iter, iter)(trans, BTREE_ID_alloc, + PTR_BUCKET_POS(ca, &ptr), + BTREE_ITER_cached); int gen = bucket_gen_get(ca, iter.pos.offset); if (gen >= 0) { @@ -980,7 +1027,7 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, prt_printf(&buf, "memory gen: %u", gen); - ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(trans, &iter))); + int ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); if (!ret) { prt_newline(&buf); bch2_bkey_val_to_text(&buf, c, k); @@ -998,9 +1045,6 @@ static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, } bch2_fs_inconsistent(c, "%s", buf.buf); - - bch2_trans_iter_exit(trans, &iter); - printbuf_exit(&buf); } int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, @@ -1052,25 +1096,22 @@ retry_pick: trace_and_count(c, io_read_fail_and_poison, &orig->bio); } - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "%s\n ", bch2_err_str(ret)); bch2_bkey_val_to_text(&buf, c, k); - bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); goto err; } if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20_key_set) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); bch2_read_err_msg_trans(trans, &buf, orig, read_pos); prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); bch2_bkey_val_to_text(&buf, c, k); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); ret = bch_err_throw(c, data_read_no_encryption_key); goto err; } @@ -1351,7 +1392,6 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, unsigned flags) { struct bch_fs *c = trans->c; - struct btree_iter iter; struct bkey_buf sk; struct bkey_s_c k; enum btree_id data_btree; @@ -1360,9 +1400,9 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, EBUG_ON(rbio->data_update); bch2_bkey_buf_init(&sk); - bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, - POS(inum.inum, bvec_iter.bi_sector), - BTREE_ITER_slots); + CLASS(btree_iter, iter)(trans, BTREE_ID_extents, + POS(inum.inum, bvec_iter.bi_sector), + BTREE_ITER_slots); while (1) { data_btree = BTREE_ID_extents; @@ -1374,12 +1414,12 @@ int __bch2_read(struct btree_trans *trans, struct bch_read_bio *rbio, if (ret) goto err; - bch2_btree_iter_set_snapshot(trans, &iter, snapshot); + bch2_btree_iter_set_snapshot(&iter, snapshot); - bch2_btree_iter_set_pos(trans, &iter, + bch2_btree_iter_set_pos(&iter, POS(inum.inum, bvec_iter.bi_sector)); - k = bch2_btree_iter_peek_slot(trans, &iter); + k = bch2_btree_iter_peek_slot(&iter); ret = bkey_err(k); if (ret) goto err; @@ -1439,13 +1479,12 @@ err: if (unlikely(ret)) { if (ret != -BCH_ERR_extent_poisoned) { - struct printbuf buf = PRINTBUF; + CLASS(printbuf, buf)(); lockrestart_do(trans, bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9)); prt_printf(&buf, "data read error: %s", bch2_err_str(ret)); bch_err_ratelimited(c, "%s", buf.buf); - printbuf_exit(&buf); } rbio->bio.bi_status = BLK_STS_IOERR; @@ -1455,7 +1494,6 @@ err: bch2_rbio_done(rbio); } - bch2_trans_iter_exit(trans, &iter); bch2_bkey_buf_exit(&sk, c); return ret; } @@ -1467,19 +1505,34 @@ static const char * const bch2_read_bio_flags[] = { NULL }; -void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) +void bch2_read_bio_to_text(struct printbuf *out, + struct bch_fs *c, + struct bch_read_bio *rbio) { + if (!out->nr_tabstops) + printbuf_tabstop_push(out, 20); + + bch2_read_err_msg(c, out, rbio, rbio->read_pos); + prt_newline(out); + + /* Are we in a retry? */ + + printbuf_indent_add(out, 2); + u64 now = local_clock(); - prt_printf(out, "start_time:\t%llu\n", rbio->start_time ? now - rbio->start_time : 0); - prt_printf(out, "submit_time:\t%llu\n", rbio->submit_time ? now - rbio->submit_time : 0); + prt_printf(out, "start_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->start_time)); + prt_newline(out); + + prt_printf(out, "submit_time:\t"); + bch2_pr_time_units(out, max_t(s64, 0, now - rbio->submit_time)); + prt_newline(out); if (!rbio->split) prt_printf(out, "end_io:\t%ps\n", rbio->end_io); else prt_printf(out, "parent:\t%px\n", rbio->parent); - prt_printf(out, "bi_end_io:\t%ps\n", rbio->bio.bi_end_io); - prt_printf(out, "promote:\t%u\n", rbio->promote); prt_printf(out, "bounce:\t%u\n", rbio->bounce); prt_printf(out, "split:\t%u\n", rbio->split); @@ -1498,6 +1551,7 @@ void bch2_read_bio_to_text(struct printbuf *out, struct bch_read_bio *rbio) prt_newline(out); bch2_bio_to_text(out, &rbio->bio); + printbuf_indent_sub(out, 2); } void bch2_fs_io_read_exit(struct bch_fs *c) |