summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_log_recover.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs/xfs_log_recover.c')
-rw-r--r--fs/xfs/xfs_log_recover.c223
1 files changed, 133 insertions, 90 deletions
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 9549188f5a36..87b1c331f9eb 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -85,17 +85,21 @@ struct xfs_buf_cancel {
*/
/*
- * Verify the given count of basic blocks is valid number of blocks
- * to specify for an operation involving the given XFS log buffer.
- * Returns nonzero if the count is valid, 0 otherwise.
+ * Verify the log-relative block number and length in basic blocks are valid for
+ * an operation involving the given XFS log buffer. Returns true if the fields
+ * are valid, false otherwise.
*/
-
-static inline int
-xlog_buf_bbcount_valid(
+static inline bool
+xlog_verify_bp(
struct xlog *log,
+ xfs_daddr_t blk_no,
int bbcount)
{
- return bbcount > 0 && bbcount <= log->l_logBBsize;
+ if (blk_no < 0 || blk_no >= log->l_logBBsize)
+ return false;
+ if (bbcount <= 0 || (blk_no + bbcount) > log->l_logBBsize)
+ return false;
+ return true;
}
/*
@@ -110,7 +114,11 @@ xlog_get_bp(
{
struct xfs_buf *bp;
- if (!xlog_buf_bbcount_valid(log, nbblks)) {
+ /*
+ * Pass log block 0 since we don't have an addr yet, buffer will be
+ * verified on read.
+ */
+ if (!xlog_verify_bp(log, 0, nbblks)) {
xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
@@ -180,9 +188,10 @@ xlog_bread_noalign(
{
int error;
- if (!xlog_buf_bbcount_valid(log, nbblks)) {
- xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
- nbblks);
+ if (!xlog_verify_bp(log, blk_no, nbblks)) {
+ xfs_warn(log->l_mp,
+ "Invalid log block/length (0x%llx, 0x%x) for buffer",
+ blk_no, nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
return -EFSCORRUPTED;
}
@@ -265,9 +274,10 @@ xlog_bwrite(
{
int error;
- if (!xlog_buf_bbcount_valid(log, nbblks)) {
- xfs_warn(log->l_mp, "Invalid block length (0x%x) for buffer",
- nbblks);
+ if (!xlog_verify_bp(log, blk_no, nbblks)) {
+ xfs_warn(log->l_mp,
+ "Invalid log block/length (0x%llx, 0x%x) for buffer",
+ blk_no, nbblks);
XFS_ERROR_REPORT(__func__, XFS_ERRLEVEL_HIGH, log->l_mp);
return -EFSCORRUPTED;
}
@@ -753,7 +763,7 @@ xlog_find_head(
* in the in-core log. The following number can be made tighter if
* we actually look at the block size of the filesystem.
*/
- num_scan_bblks = XLOG_TOTAL_REC_SHIFT(log);
+ num_scan_bblks = min_t(int, log_bbnum, XLOG_TOTAL_REC_SHIFT(log));
if (head_blk >= num_scan_bblks) {
/*
* We are guaranteed that the entire check can be performed
@@ -1029,61 +1039,106 @@ out_error:
}
/*
- * Check the log tail for torn writes. This is required when torn writes are
- * detected at the head and the head had to be walked back to a previous record.
- * The tail of the previous record must now be verified to ensure the torn
- * writes didn't corrupt the previous tail.
+ * Calculate distance from head to tail (i.e., unused space in the log).
+ */
+static inline int
+xlog_tail_distance(
+ struct xlog *log,
+ xfs_daddr_t head_blk,
+ xfs_daddr_t tail_blk)
+{
+ if (head_blk < tail_blk)
+ return tail_blk - head_blk;
+
+ return tail_blk + (log->l_logBBsize - head_blk);
+}
+
+/*
+ * Verify the log tail. This is particularly important when torn or incomplete
+ * writes have been detected near the front of the log and the head has been
+ * walked back accordingly.
+ *
+ * We also have to handle the case where the tail was pinned and the head
+ * blocked behind the tail right before a crash. If the tail had been pushed
+ * immediately prior to the crash and the subsequent checkpoint was only
+ * partially written, it's possible it overwrote the last referenced tail in the
+ * log with garbage. This is not a coherency problem because the tail must have
+ * been pushed before it can be overwritten, but appears as log corruption to
+ * recovery because we have no way to know the tail was updated if the
+ * subsequent checkpoint didn't write successfully.
*
- * Return an error if CRC verification fails as recovery cannot proceed.
+ * Therefore, CRC check the log from tail to head. If a failure occurs and the
+ * offending record is within max iclog bufs from the head, walk the tail
+ * forward and retry until a valid tail is found or corruption is detected out
+ * of the range of a possible overwrite.
*/
STATIC int
xlog_verify_tail(
struct xlog *log,
xfs_daddr_t head_blk,
- xfs_daddr_t tail_blk)
+ xfs_daddr_t *tail_blk,
+ int hsize)
{
struct xlog_rec_header *thead;
struct xfs_buf *bp;
xfs_daddr_t first_bad;
- int count;
int error = 0;
bool wrapped;
- xfs_daddr_t tmp_head;
+ xfs_daddr_t tmp_tail;
+ xfs_daddr_t orig_tail = *tail_blk;
bp = xlog_get_bp(log, 1);
if (!bp)
return -ENOMEM;
/*
- * Seek XLOG_MAX_ICLOGS + 1 records past the current tail record to get
- * a temporary head block that points after the last possible
- * concurrently written record of the tail.
+ * Make sure the tail points to a record (returns positive count on
+ * success).
*/
- count = xlog_seek_logrec_hdr(log, head_blk, tail_blk,
- XLOG_MAX_ICLOGS + 1, bp, &tmp_head, &thead,
- &wrapped);
- if (count < 0) {
- error = count;
+ error = xlog_seek_logrec_hdr(log, head_blk, *tail_blk, 1, bp,
+ &tmp_tail, &thead, &wrapped);
+ if (error < 0)
goto out;
- }
-
- /*
- * If the call above didn't find XLOG_MAX_ICLOGS + 1 records, we ran
- * into the actual log head. tmp_head points to the start of the record
- * so update it to the actual head block.
- */
- if (count < XLOG_MAX_ICLOGS + 1)
- tmp_head = head_blk;
+ if (*tail_blk != tmp_tail)
+ *tail_blk = tmp_tail;
/*
- * We now have a tail and temporary head block that covers at least
- * XLOG_MAX_ICLOGS records from the tail. We need to verify that these
- * records were completely written. Run a CRC verification pass from
- * tail to head and return the result.
+ * Run a CRC check from the tail to the head. We can't just check
+ * MAX_ICLOGS records past the tail because the tail may point to stale
+ * blocks cleared during the search for the head/tail. These blocks are
+ * overwritten with zero-length records and thus record count is not a
+ * reliable indicator of the iclog state before a crash.
*/
- error = xlog_do_recovery_pass(log, tmp_head, tail_blk,
+ first_bad = 0;
+ error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
XLOG_RECOVER_CRCPASS, &first_bad);
+ while ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
+ int tail_distance;
+
+ /*
+ * Is corruption within range of the head? If so, retry from
+ * the next record. Otherwise return an error.
+ */
+ tail_distance = xlog_tail_distance(log, head_blk, first_bad);
+ if (tail_distance > BTOBB(XLOG_MAX_ICLOGS * hsize))
+ break;
+
+ /* skip to the next record; returns positive count on success */
+ error = xlog_seek_logrec_hdr(log, head_blk, first_bad, 2, bp,
+ &tmp_tail, &thead, &wrapped);
+ if (error < 0)
+ goto out;
+ *tail_blk = tmp_tail;
+ first_bad = 0;
+ error = xlog_do_recovery_pass(log, head_blk, *tail_blk,
+ XLOG_RECOVER_CRCPASS, &first_bad);
+ }
+
+ if (!error && *tail_blk != orig_tail)
+ xfs_warn(log->l_mp,
+ "Tail block (0x%llx) overwrite detected. Updated to 0x%llx",
+ orig_tail, *tail_blk);
out:
xlog_put_bp(bp);
return error;
@@ -1143,7 +1198,7 @@ xlog_verify_head(
*/
error = xlog_do_recovery_pass(log, *head_blk, tmp_rhead_blk,
XLOG_RECOVER_CRCPASS, &first_bad);
- if (error == -EFSBADCRC) {
+ if ((error == -EFSBADCRC || error == -EFSCORRUPTED) && first_bad) {
/*
* We've hit a potential torn write. Reset the error and warn
* about it.
@@ -1183,31 +1238,12 @@ xlog_verify_head(
ASSERT(0);
return 0;
}
-
- /*
- * Now verify the tail based on the updated head. This is
- * required because the torn writes trimmed from the head could
- * have been written over the tail of a previous record. Return
- * any errors since recovery cannot proceed if the tail is
- * corrupt.
- *
- * XXX: This leaves a gap in truly robust protection from torn
- * writes in the log. If the head is behind the tail, the tail
- * pushes forward to create some space and then a crash occurs
- * causing the writes into the previous record's tail region to
- * tear, log recovery isn't able to recover.
- *
- * How likely is this to occur? If possible, can we do something
- * more intelligent here? Is it safe to push the tail forward if
- * we can determine that the tail is within the range of the
- * torn write (e.g., the kernel can only overwrite the tail if
- * it has actually been pushed forward)? Alternatively, could we
- * somehow prevent this condition at runtime?
- */
- error = xlog_verify_tail(log, *head_blk, *tail_blk);
}
+ if (error)
+ return error;
- return error;
+ return xlog_verify_tail(log, *head_blk, tail_blk,
+ be32_to_cpu((*rhead)->h_size));
}
/*
@@ -2949,7 +2985,7 @@ xlog_recover_inode_pass2(
struct xlog_recover_item *item,
xfs_lsn_t current_lsn)
{
- xfs_inode_log_format_t *in_f;
+ struct xfs_inode_log_format *in_f;
xfs_mount_t *mp = log->l_mp;
xfs_buf_t *bp;
xfs_dinode_t *dip;
@@ -2963,10 +2999,10 @@ xlog_recover_inode_pass2(
uint isize;
int need_free = 0;
- if (item->ri_buf[0].i_len == sizeof(xfs_inode_log_format_t)) {
+ if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
in_f = item->ri_buf[0].i_addr;
} else {
- in_f = kmem_alloc(sizeof(xfs_inode_log_format_t), KM_SLEEP);
+ in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), KM_SLEEP);
need_free = 1;
error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
if (error)
@@ -3137,16 +3173,8 @@ xlog_recover_inode_pass2(
}
fields = in_f->ilf_fields;
- switch (fields & (XFS_ILOG_DEV | XFS_ILOG_UUID)) {
- case XFS_ILOG_DEV:
+ if (fields & XFS_ILOG_DEV)
xfs_dinode_put_rdev(dip, in_f->ilf_u.ilfu_rdev);
- break;
- case XFS_ILOG_UUID:
- memcpy(XFS_DFORK_DPTR(dip),
- &in_f->ilf_u.ilfu_uuid,
- sizeof(uuid_t));
- break;
- }
if (in_f->ilf_size == 2)
goto out_owner_change;
@@ -4271,7 +4299,7 @@ xlog_recover_add_to_trans(
char *dp,
int len)
{
- xfs_inode_log_format_t *in_f; /* any will do */
+ struct xfs_inode_log_format *in_f; /* any will do */
xlog_recover_item_t *item;
char *ptr;
@@ -4305,7 +4333,7 @@ xlog_recover_add_to_trans(
ptr = kmem_alloc(len, KM_SLEEP);
memcpy(ptr, dp, len);
- in_f = (xfs_inode_log_format_t *)ptr;
+ in_f = (struct xfs_inode_log_format *)ptr;
/* take the tail entry */
item = list_entry(trans->r_itemq.prev, xlog_recover_item_t, ri_list);
@@ -4801,12 +4829,16 @@ xlog_recover_process_intents(
int error = 0;
struct xfs_ail_cursor cur;
struct xfs_ail *ailp;
+#if defined(DEBUG) || defined(XFS_WARN)
xfs_lsn_t last_lsn;
+#endif
ailp = log->l_ailp;
spin_lock(&ailp->xa_lock);
lip = xfs_trans_ail_cursor_first(ailp, &cur, 0);
+#if defined(DEBUG) || defined(XFS_WARN)
last_lsn = xlog_assign_lsn(log->l_curr_cycle, log->l_curr_block);
+#endif
while (lip != NULL) {
/*
* We're done when we see something other than an intent.
@@ -5218,7 +5250,7 @@ xlog_do_recovery_pass(
xfs_daddr_t *first_bad) /* out: first bad log rec */
{
xlog_rec_header_t *rhead;
- xfs_daddr_t blk_no;
+ xfs_daddr_t blk_no, rblk_no;
xfs_daddr_t rhead_blk;
char *offset;
xfs_buf_t *hbp, *dbp;
@@ -5231,7 +5263,7 @@ xlog_do_recovery_pass(
LIST_HEAD (buffer_list);
ASSERT(head_blk != tail_blk);
- rhead_blk = 0;
+ blk_no = rhead_blk = tail_blk;
for (i = 0; i < XLOG_RHASH_SIZE; i++)
INIT_HLIST_HEAD(&rhash[i]);
@@ -5309,7 +5341,6 @@ xlog_do_recovery_pass(
}
memset(rhash, 0, sizeof(rhash));
- blk_no = rhead_blk = tail_blk;
if (tail_blk > head_blk) {
/*
* Perform recovery around the end of the physical log.
@@ -5371,9 +5402,19 @@ xlog_do_recovery_pass(
bblks = (int)BTOBB(be32_to_cpu(rhead->h_len));
blk_no += hblks;
- /* Read in data for log record */
- if (blk_no + bblks <= log->l_logBBsize) {
- error = xlog_bread(log, blk_no, bblks, dbp,
+ /*
+ * Read the log record data in multiple reads if it
+ * wraps around the end of the log. Note that if the
+ * header already wrapped, blk_no could point past the
+ * end of the log. The record data is contiguous in
+ * that case.
+ */
+ if (blk_no + bblks <= log->l_logBBsize ||
+ blk_no >= log->l_logBBsize) {
+ /* mod blk_no in case the header wrapped and
+ * pushed it beyond the end of the log */
+ rblk_no = do_mod(blk_no, log->l_logBBsize);
+ error = xlog_bread(log, rblk_no, bblks, dbp,
&offset);
if (error)
goto bread_err2;
@@ -5563,6 +5604,8 @@ xlog_do_recover(
xfs_buf_t *bp;
xfs_sb_t *sbp;
+ trace_xfs_log_recover(log, head_blk, tail_blk);
+
/*
* First replay the images in the log.
*/
@@ -5782,7 +5825,7 @@ xlog_recover_cancel(
* Read all of the agf and agi counters and check that they
* are consistent with the superblock counters.
*/
-void
+STATIC void
xlog_recover_check_summary(
struct xlog *log)
{