summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:45:53 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-09-17 18:54:52 -0700
commit88687a1baa6621ac2509b2d0e1d3d29ed54de536 (patch)
tree9584e6fbc8f1481bd95b6160353adfefbff46e2d
parent98618b35d753c64477255a188350520959926dcc (diff)
xfs: repair obviously broken inode modes
Building off the rmap scanner that we added in the previous patch, we can now find block 0 and try to use the information contained inside of it to guess the mode of an inode if it's totally improper. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/scrub/inode_repair.c169
-rw-r--r--fs/xfs/scrub/trace.h11
2 files changed, 171 insertions, 9 deletions
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 63f5c68b8444..2dc2d6657c75 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -66,6 +66,9 @@ struct xrep_dinode_stats {
/* Blocks in use by the attr fork. */
xfs_rfsblock_t attr_blocks;
+ /* Physical block containing data block 0. */
+ xfs_fsblock_t block0;
+
/* Number of data device extents for the data fork. */
xfs_extnum_t data_extents;
@@ -149,11 +152,157 @@ xrep_dinode_header(
dip->di_gen = cpu_to_be32(sc->sm->sm_gen);
}
+/* Parse enough of the directory block header to guess if this is a dir. */
+static inline bool
+xrep_dinode_is_dir(
+ xfs_ino_t ino,
+ xfs_daddr_t daddr,
+ struct xfs_buf *bp)
+{
+ struct xfs_dir3_blk_hdr *hdr3 = bp->b_addr;
+ struct xfs_dir2_data_free *bf;
+ struct xfs_mount *mp = bp->b_mount;
+ xfs_lsn_t lsn = be64_to_cpu(hdr3->lsn);
+
+ /* Does the dir3 header match the filesystem? */
+ if (hdr3->magic != cpu_to_be32(XFS_DIR3_BLOCK_MAGIC) &&
+ hdr3->magic != cpu_to_be32(XFS_DIR3_DATA_MAGIC))
+ return false;
+
+ if (be64_to_cpu(hdr3->owner) != ino)
+ return false;
+
+ if (!uuid_equal(&hdr3->uuid, &mp->m_sb.sb_meta_uuid))
+ return false;
+
+ if (be64_to_cpu(hdr3->blkno) != daddr)
+ return false;
+
+ /* Directory blocks are always logged and must have a valid LSN. */
+ if (lsn == NULLCOMMITLSN)
+ return false;
+ if (!xlog_valid_lsn(mp->m_log, lsn))
+ return false;
+
+ /*
+ * bestfree information lives immediately after the end of the header,
+ * so we won't run off the end of the buffer.
+ */
+ bf = xfs_dir2_data_bestfree_p(mp, bp->b_addr);
+ if (!bf[0].length && bf[0].offset)
+ return false;
+ if (!bf[1].length && bf[1].offset)
+ return false;
+ if (!bf[2].length && bf[2].offset)
+ return false;
+
+ if (be16_to_cpu(bf[0].length) < be16_to_cpu(bf[1].length))
+ return false;
+ if (be16_to_cpu(bf[1].length) < be16_to_cpu(bf[2].length))
+ return false;
+
+ return true;
+}
+
+/* Guess the mode of this file from the contents. */
+STATIC uint16_t
+xrep_dinode_guess_mode(
+ struct xfs_dinode *dip,
+ struct xrep_dinode_stats *dis)
+{
+ struct xfs_buf *bp;
+ xfs_daddr_t daddr;
+ uint64_t fsize = be64_to_cpu(dip->di_size);
+ unsigned int dfork_sz = XFS_DFORK_DSIZE(dip, dis->sc->mp);
+ uint16_t mode = S_IFREG;
+ int error;
+
+ switch (dip->di_format) {
+ case XFS_DINODE_FMT_LOCAL:
+ /*
+ * If the data fork is local format, the size of the data area
+ * is reasonable and is big enough to contain the entire file,
+ * we can guess the file type from the local data.
+ *
+ * If there are no nulls, guess this is a symbolic link.
+ * Otherwise, this is probably a shortform directory.
+ */
+ if (dfork_sz <= XFS_LITINO(dis->sc->mp) && dfork_sz >= fsize) {
+ if (!memchr(XFS_DFORK_DPTR(dip), 0, fsize))
+ return S_IFLNK;
+ return S_IFDIR;
+ }
+
+ /* By default, we guess regular file. */
+ return S_IFREG;
+ case XFS_DINODE_FMT_DEV:
+ /*
+ * If the data fork is dev format, the size of the data area is
+ * reasonable and large enough to store a dev_t, and the file
+ * size is zero, this could be a blockdev, a chardev, a fifo,
+ * or a socket. There is no solid way to distinguish between
+ * those choices, so we guess blockdev if the device number is
+ * nonzero and chardev if it's zero (aka whiteout).
+ */
+ if (dfork_sz <= XFS_LITINO(dis->sc->mp) &&
+ dfork_sz >= sizeof(__be32) && fsize == 0) {
+ xfs_dev_t dev = xfs_dinode_get_rdev(dip);
+
+ return dev != 0 ? S_IFBLK : S_IFCHR;
+ }
+
+ /* By default, we guess regular file. */
+ return S_IFREG;
+ case XFS_DINODE_FMT_EXTENTS:
+ case XFS_DINODE_FMT_BTREE:
+ /* There are data blocks to examine below. */
+ break;
+ default:
+ /* Everything else is considered a regular file. */
+ return S_IFREG;
+ }
+
+ /* There are no zero-length directories. */
+ if (fsize == 0)
+ return S_IFREG;
+
+ /*
+ * If we didn't find a written mapping for file block zero, we'll guess
+ * that it's a sparse regular file.
+ */
+ if (dis->block0 == NULLFSBLOCK)
+ return S_IFREG;
+
+ /* Directories can't have rt extents. */
+ if (dis->rt_extents > 0)
+ return S_IFREG;
+
+ /*
+ * Read the first block of the file. Since we have no idea what kind
+ * of file geometry (e.g. dirblock size) we might be reading into, use
+ * an uncached buffer so that we don't pollute the buffer cache. We
+ * can't do uncached mapped buffers, so the best we can do is guess
+ * from the directory header.
+ */
+ daddr = XFS_FSB_TO_DADDR(dis->sc->mp, dis->block0);
+ error = xfs_buf_read_uncached(dis->sc->mp->m_ddev_targp, daddr,
+ XFS_FSS_TO_BB(dis->sc->mp, 1), 0, &bp, NULL);
+ if (error)
+ return S_IFREG;
+
+ if (xrep_dinode_is_dir(dis->sc->sm->sm_ino, daddr, bp))
+ mode = S_IFDIR;
+
+ xfs_buf_relse(bp);
+ return mode;
+}
+
/* Turn di_mode into /something/ recognizable. */
STATIC void
xrep_dinode_mode(
struct xfs_scrub *sc,
- struct xfs_dinode *dip)
+ struct xfs_dinode *dip,
+ struct xrep_dinode_stats *dis)
{
uint16_t mode;
@@ -164,7 +313,7 @@ xrep_dinode_mode(
return;
/* bad mode, so we set it to a file that only root can read */
- mode = S_IFREG;
+ mode = xrep_dinode_guess_mode(dip, dis);
dip->di_mode = cpu_to_be16(mode);
dip->di_uid = 0;
dip->di_gid = 0;
@@ -367,9 +516,17 @@ xrep_dinode_walk_rmap(
}
dis->data_blocks += rec->rm_blockcount;
- if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK))
+ if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) {
dis->data_extents++;
+ if (rec->rm_offset == 0 &&
+ !(rec->rm_flags & XFS_RMAP_UNWRITTEN)) {
+ if (dis->block0 != NULLFSBLOCK)
+ return -EFSCORRUPTED;
+ dis->block0 = rec->rm_startblock;
+ }
+ }
+
return 0;
}
@@ -421,7 +578,8 @@ xrep_dinode_count_rmaps(
trace_xrep_dinode_count_rmaps(dis->sc,
dis->data_blocks, dis->rt_blocks, dis->attr_blocks,
- dis->data_extents, dis->rt_extents, dis->attr_extents);
+ dis->data_extents, dis->rt_extents, dis->attr_extents,
+ dis->block0);
return 0;
}
@@ -901,6 +1059,7 @@ xrep_dinode_core(
{
struct xrep_dinode_stats dis = {
.sc = sc,
+ .block0 = NULLFSBLOCK,
};
struct xfs_imap imap;
struct xfs_buf *bp;
@@ -938,7 +1097,7 @@ xrep_dinode_core(
/* Fix everything the verifier will complain about. */
dip = xfs_buf_offset(bp, imap.im_boffset);
xrep_dinode_header(sc, dip);
- xrep_dinode_mode(sc, dip);
+ xrep_dinode_mode(sc, dip, &dis);
xrep_dinode_flags(sc, dip, dis.rt_extents > 0);
xrep_dinode_size(sc, dip);
xrep_dinode_extsize_hints(sc, dip);
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 86051920ac49..244f0d154abe 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1247,9 +1247,9 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
TP_PROTO(struct xfs_scrub *sc, xfs_rfsblock_t data_blocks,
xfs_rfsblock_t rt_blocks, xfs_rfsblock_t attr_blocks,
xfs_extnum_t data_extents, xfs_extnum_t rt_extents,
- xfs_aextnum_t attr_extents),
+ xfs_aextnum_t attr_extents, xfs_fsblock_t block0),
TP_ARGS(sc, data_blocks, rt_blocks, attr_blocks, data_extents,
- rt_extents, attr_extents),
+ rt_extents, attr_extents, block0),
TP_STRUCT__entry(
__field(dev_t, dev)
__field(xfs_ino_t, ino)
@@ -1259,6 +1259,7 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__field(xfs_extnum_t, data_extents)
__field(xfs_extnum_t, rt_extents)
__field(xfs_aextnum_t, attr_extents)
+ __field(xfs_fsblock_t, block0)
),
TP_fast_assign(
__entry->dev = sc->mp->m_super->s_dev;
@@ -1269,8 +1270,9 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__entry->data_extents = data_extents;
__entry->rt_extents = rt_extents;
__entry->attr_extents = attr_extents;
+ __entry->block0 = block0;
),
- TP_printk("dev %d:%d ino 0x%llx dblocks 0x%llx rtblocks 0x%llx ablocks 0x%llx dextents %u rtextents %u aextents %u",
+ TP_printk("dev %d:%d ino 0x%llx dblocks 0x%llx rtblocks 0x%llx ablocks 0x%llx dextents %u rtextents %u aextents %u startblock0 0x%llx",
MAJOR(__entry->dev), MINOR(__entry->dev),
__entry->ino,
__entry->data_blocks,
@@ -1278,7 +1280,8 @@ TRACE_EVENT(xrep_dinode_count_rmaps,
__entry->attr_blocks,
__entry->data_extents,
__entry->rt_extents,
- __entry->attr_extents)
+ __entry->attr_extents,
+ __entry->block0)
);
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */