summaryrefslogtreecommitdiff
path: root/libbcachefs/journal.c
diff options
context:
space:
mode:
Diffstat (limited to 'libbcachefs/journal.c')
-rw-r--r--libbcachefs/journal.c92
1 files changed, 70 insertions, 22 deletions
diff --git a/libbcachefs/journal.c b/libbcachefs/journal.c
index c9c2ee9c..3f0e6d71 100644
--- a/libbcachefs/journal.c
+++ b/libbcachefs/journal.c
@@ -76,6 +76,67 @@ static void journal_pin_list_init(struct journal_entry_pin_list *p, int count)
p->devs.nr = 0;
}
+/*
+ * Detect stuck journal conditions and trigger shutdown. Technically the journal
+ * can end up stuck for a variety of reasons, such as a blocked I/O, journal
+ * reservation lockup, etc. Since this is a fatal error with potentially
+ * unpredictable characteristics, we want to be fairly conservative before we
+ * decide to shut things down.
+ *
+ * Consider the journal stuck when it appears full with no ability to commit
+ * btree transactions, to discard journal buckets, nor acquire priority
+ * (reserved watermark) reservation.
+ */
+static inline bool
+journal_error_check_stuck(struct journal *j, int error, unsigned flags)
+{
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ bool stuck = false;
+ struct printbuf buf = PRINTBUF;
+
+ if (!(error == JOURNAL_ERR_journal_full ||
+ error == JOURNAL_ERR_journal_pin_full) ||
+ nr_unwritten_journal_entries(j) ||
+ (flags & JOURNAL_WATERMARK_MASK) != JOURNAL_WATERMARK_reserved)
+ return stuck;
+
+ spin_lock(&j->lock);
+
+ if (j->can_discard) {
+ spin_unlock(&j->lock);
+ return stuck;
+ }
+
+ stuck = true;
+
+ /*
+ * The journal shutdown path will set ->err_seq, but do it here first to
+ * serialize against concurrent failures and avoid duplicate error
+ * reports.
+ */
+ if (j->err_seq) {
+ spin_unlock(&j->lock);
+ return stuck;
+ }
+ j->err_seq = journal_cur_seq(j);
+ spin_unlock(&j->lock);
+
+ bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (error %s)",
+ bch2_journal_errors[error]);
+ bch2_journal_debug_to_text(&buf, j);
+ bch_err(c, "%s", buf.buf);
+
+ printbuf_reset(&buf);
+ bch2_journal_pins_to_text(&buf, j);
+ bch_err(c, "Journal pins:\n%s", buf.buf);
+ printbuf_exit(&buf);
+
+ bch2_fatal_error(c);
+ dump_stack();
+
+ return stuck;
+}
+
/* journal entry close/open: */
void __bch2_journal_buf_put(struct journal *j)
@@ -163,6 +224,7 @@ void bch2_journal_halt(struct journal *j)
__journal_entry_close(j, JOURNAL_ENTRY_ERROR_VAL);
if (!j->err_seq)
j->err_seq = journal_cur_seq(j);
+ journal_wake(j);
spin_unlock(&j->lock);
}
@@ -363,6 +425,12 @@ retry:
spin_lock(&j->lock);
+ /* check once more in case somebody else shut things down... */
+ if (bch2_journal_error(j)) {
+ spin_unlock(&j->lock);
+ return -BCH_ERR_erofs_journal_err;
+ }
+
/*
* Recheck after taking the lock, so we don't race with another thread
* that just did journal_entry_open() and call journal_entry_close()
@@ -410,28 +478,8 @@ unlock:
if (!ret)
goto retry;
-
- if ((ret == JOURNAL_ERR_journal_full ||
- ret == JOURNAL_ERR_journal_pin_full) &&
- !can_discard &&
- !nr_unwritten_journal_entries(j) &&
- (flags & JOURNAL_WATERMARK_MASK) == JOURNAL_WATERMARK_reserved) {
- struct printbuf buf = PRINTBUF;
-
- bch_err(c, "Journal stuck! Hava a pre-reservation but journal full (ret %s)",
- bch2_journal_errors[ret]);
-
- bch2_journal_debug_to_text(&buf, j);
- bch_err(c, "%s", buf.buf);
-
- printbuf_reset(&buf);
- bch2_journal_pins_to_text(&buf, j);
- bch_err(c, "Journal pins:\n%s", buf.buf);
-
- printbuf_exit(&buf);
- bch2_fatal_error(c);
- dump_stack();
- }
+ if (journal_error_check_stuck(j, ret, flags))
+ ret = -BCH_ERR_journal_res_get_blocked;
/*
* Journal is full - can't rely on reclaim from work item due to