diff options
Diffstat (limited to 'libbcachefs/journal_io.c')
-rw-r--r-- | libbcachefs/journal_io.c | 65 |
1 files changed, 34 insertions, 31 deletions
diff --git a/libbcachefs/journal_io.c b/libbcachefs/journal_io.c index 179361be..2b1974a9 100644 --- a/libbcachefs/journal_io.c +++ b/libbcachefs/journal_io.c @@ -987,7 +987,6 @@ static void bch2_journal_read_device(struct closure *cl) struct journal_replay *r, **_r; struct genradix_iter iter; struct journal_read_buf buf = { NULL, 0 }; - u64 min_seq = U64_MAX; unsigned i; int ret = 0; @@ -1006,45 +1005,27 @@ static void bch2_journal_read_device(struct closure *cl) goto err; } - /* Find the journal bucket with the highest sequence number: */ - for (i = 0; i < ja->nr; i++) { - if (ja->bucket_seq[i] > ja->bucket_seq[ja->cur_idx]) - ja->cur_idx = i; - - min_seq = min(ja->bucket_seq[i], min_seq); - } - - /* - * If there's duplicate journal entries in multiple buckets (which - * definitely isn't supposed to happen, but...) - make sure to start - * cur_idx at the last of those buckets, so we don't deadlock trying to - * allocate - */ - while (ja->bucket_seq[ja->cur_idx] > min_seq && - ja->bucket_seq[ja->cur_idx] == - ja->bucket_seq[(ja->cur_idx + 1) % ja->nr]) - ja->cur_idx = (ja->cur_idx + 1) % ja->nr; - ja->sectors_free = ca->mi.bucket_size; mutex_lock(&jlist->lock); - genradix_for_each(&c->journal_entries, iter, _r) { + genradix_for_each_reverse(&c->journal_entries, iter, _r) { r = *_r; if (!r) continue; for (i = 0; i < r->nr_ptrs; i++) { - if (r->ptrs[i].dev == ca->dev_idx && - sector_to_bucket(ca, r->ptrs[i].sector) == ja->buckets[ja->cur_idx]) { + if (r->ptrs[i].dev == ca->dev_idx) { unsigned wrote = bucket_remainder(ca, r->ptrs[i].sector) + vstruct_sectors(&r->j, c->block_bits); - ja->sectors_free = min(ja->sectors_free, - ca->mi.bucket_size - wrote); + ja->cur_idx = r->ptrs[i].bucket; + ja->sectors_free = ca->mi.bucket_size - wrote; + goto found; } } } +found: mutex_unlock(&jlist->lock); if (ja->bucket_seq[ja->cur_idx] && @@ -1660,20 +1641,42 @@ void bch2_journal_write(struct closure *cl) j->write_start_time = local_clock(); spin_lock(&j->lock); - if (bch2_journal_error(j) || - w->noflush || - (!w->must_flush && - (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && - test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags))) { + + /* + * If the journal is in an error state - we did an emergency shutdown - + * we prefer to continue doing journal writes. We just mark them as + * noflush so they'll never be used, but they'll still be visible by the + * list_journal tool - this helps in debugging. + * + * There's a caveat: the first journal write after marking the + * superblock dirty must always be a flush write, because on startup + * from a clean shutdown we didn't necessarily read the journal and the + * new journal write might overwrite whatever was in the journal + * previously - we can't leave the journal without any flush writes in + * it. + * + * So if we're in an error state, and we're still starting up, we don't + * write anything at all. + */ + if (!test_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags) && + (bch2_journal_error(j) || + w->noflush || + (!w->must_flush && + (jiffies - j->last_flush_write) < msecs_to_jiffies(c->opts.journal_flush_delay) && + test_bit(JOURNAL_MAY_SKIP_FLUSH, &j->flags)))) { w->noflush = true; SET_JSET_NO_FLUSH(jset, true); jset->last_seq = 0; w->last_seq = 0; j->nr_noflush_writes++; - } else { + } else if (!bch2_journal_error(j)) { j->last_flush_write = jiffies; j->nr_flush_writes++; + clear_bit(JOURNAL_NEED_FLUSH_WRITE, &j->flags); + } else { + spin_unlock(&j->lock); + goto err; } spin_unlock(&j->lock); |