summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/xfs_log.c36
-rw-r--r--fs/xfs/xfs_log_cil.c13
-rw-r--r--fs/xfs/xfs_log_priv.h3
3 files changed, 39 insertions, 13 deletions
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 82f5996d3889..e8c6c96d4f7c 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -489,12 +489,17 @@ out_error:
/*
* Flush iclog to disk if this is the last reference to the given iclog and the
- * it is in the WANT_SYNC state.
+ * it is in the WANT_SYNC state. If the caller passes in a non-zero
+ * @old_tail_lsn and the current log tail does not match, there may be metadata
+ * on disk that must be persisted before this iclog is written. To satisfy that
+ * requirement, set the XLOG_ICL_NEED_FLUSH flag as a condition for writing this
+ * iclog with the new log tail value.
*/
int
xlog_state_release_iclog(
struct xlog *log,
- struct xlog_in_core *iclog)
+ struct xlog_in_core *iclog,
+ xfs_lsn_t old_tail_lsn)
{
xfs_lsn_t tail_lsn;
lockdep_assert_held(&log->l_icloglock);
@@ -503,6 +508,19 @@ xlog_state_release_iclog(
if (iclog->ic_state == XLOG_STATE_IOERROR)
return -EIO;
+ /*
+ * Grabbing the current log tail needs to be atomic w.r.t. the writing
+ * of the tail LSN into the iclog so we guarantee that the log tail does
+ * not move between deciding if a cache flush is required and writing
+ * the LSN into the iclog below.
+ */
+ if (old_tail_lsn || iclog->ic_state == XLOG_STATE_WANT_SYNC) {
+ tail_lsn = xlog_assign_tail_lsn(log->l_mp);
+
+ if (old_tail_lsn && tail_lsn != old_tail_lsn)
+ iclog->ic_flags |= XLOG_ICL_NEED_FLUSH;
+ }
+
if (!atomic_dec_and_test(&iclog->ic_refcnt))
return 0;
@@ -511,8 +529,6 @@ xlog_state_release_iclog(
return 0;
}
- /* update tail before writing to iclog */
- tail_lsn = xlog_assign_tail_lsn(log->l_mp);
iclog->ic_state = XLOG_STATE_SYNCING;
iclog->ic_header.h_tail_lsn = cpu_to_be64(tail_lsn);
xlog_verify_tail_lsn(log, iclog, tail_lsn);
@@ -858,7 +874,7 @@ out_err:
* iclog containing the unmount record is written.
*/
iclog->ic_flags |= (XLOG_ICL_NEED_FLUSH | XLOG_ICL_NEED_FUA);
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
xlog_wait_on_iclog(iclog);
if (tic) {
@@ -2302,7 +2318,7 @@ xlog_write_copy_finish(
return 0;
release_iclog:
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
spin_unlock(&log->l_icloglock);
return error;
}
@@ -2521,7 +2537,7 @@ next_lv:
ASSERT(optype & XLOG_COMMIT_TRANS);
*commit_iclog = iclog;
} else {
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
}
spin_unlock(&log->l_icloglock);
@@ -2959,7 +2975,7 @@ restart:
* reference to the iclog.
*/
if (!atomic_add_unless(&iclog->ic_refcnt, -1, 1))
- error = xlog_state_release_iclog(log, iclog);
+ error = xlog_state_release_iclog(log, iclog, 0);
spin_unlock(&log->l_icloglock);
if (error)
return error;
@@ -3195,7 +3211,7 @@ xfs_log_force(
atomic_inc(&iclog->ic_refcnt);
lsn = be64_to_cpu(iclog->ic_header.h_lsn);
xlog_state_switch_iclogs(log, iclog, 0);
- if (xlog_state_release_iclog(log, iclog))
+ if (xlog_state_release_iclog(log, iclog, 0))
goto out_error;
if (be64_to_cpu(iclog->ic_header.h_lsn) != lsn)
@@ -3275,7 +3291,7 @@ xlog_force_lsn(
}
atomic_inc(&iclog->ic_refcnt);
xlog_state_switch_iclogs(log, iclog, 0);
- if (xlog_state_release_iclog(log, iclog))
+ if (xlog_state_release_iclog(log, iclog, 0))
goto out_error;
if (log_flushed)
*log_flushed = 1;
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index b128aaa9b870..4c44bc3786c0 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -654,8 +654,9 @@ xlog_cil_push_work(
struct xfs_trans_header thdr;
struct xfs_log_iovec lhdr;
struct xfs_log_vec lvhdr = { NULL };
+ xfs_lsn_t preflush_tail_lsn;
xfs_lsn_t commit_lsn;
- xfs_lsn_t push_seq;
+ xfs_csn_t push_seq;
struct bio bio;
DECLARE_COMPLETION_ONSTACK(bdev_flush);
@@ -730,7 +731,15 @@ xlog_cil_push_work(
* because we hold the flush lock exclusively. Hence we can now issue
* a cache flush to ensure all the completed metadata in the journal we
* are about to overwrite is on stable storage.
+ *
+ * Because we are issuing this cache flush before we've written the
+ * tail lsn to the iclog, we can have metadata IO completions move the
+ * tail forwards between the completion of this flush and the iclog
+ * being written. In this case, we need to re-issue the cache flush
+ * before the iclog write. To detect whether the log tail moves, sample
+ * the tail LSN *before* we issue the flush.
*/
+ preflush_tail_lsn = atomic64_read(&log->l_tail_lsn);
xfs_flush_bdev_async(&bio, log->l_mp->m_ddev_targp->bt_bdev,
&bdev_flush);
@@ -941,7 +950,7 @@ restart:
* storage.
*/
commit_iclog->ic_flags |= XLOG_ICL_NEED_FUA;
- xlog_state_release_iclog(log, commit_iclog);
+ xlog_state_release_iclog(log, commit_iclog, preflush_tail_lsn);
spin_unlock(&log->l_icloglock);
return;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h
index 4c41bbfa33b0..7cbde0b4f990 100644
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -497,7 +497,8 @@ int xlog_commit_record(struct xlog *log, struct xlog_ticket *ticket,
void xfs_log_ticket_ungrant(struct xlog *log, struct xlog_ticket *ticket);
void xfs_log_ticket_regrant(struct xlog *log, struct xlog_ticket *ticket);
-int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog);
+int xlog_state_release_iclog(struct xlog *log, struct xlog_in_core *iclog,
+ xfs_lsn_t log_tail_lsn);
/*
* When we crack an atomic LSN, we sample it first so that the value will not