summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2020-02-19 17:01:48 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2020-06-01 21:16:31 -0700
commitee4d955c32eb0ea93a6e4c6df066b66771f4e1fd (patch)
tree277d8069d011f1cf0c3ff3ef3b6bc52021bcbe9f
parenta4154094a525c1b6cdadc105950c55c0fa70c8fc (diff)
xfs: repair extended attributes
If the extended attributes look bad, try to sift through the rubble to find whatever keys/values we can, zap the attr tree, and re-add the values. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/attr.c10
-rw-r--r--fs/xfs/scrub/attr.h10
-rw-r--r--fs/xfs/scrub/attr_repair.c832
-rw-r--r--fs/xfs/scrub/repair.c31
-rw-r--r--fs/xfs/scrub/repair.h4
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/scrub.h3
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h85
-rw-r--r--fs/xfs/xfs_buf.c9
-rw-r--r--fs/xfs/xfs_buf.h8
12 files changed, 992 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d618718771ec..3fd56fb0ab48 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -170,6 +170,7 @@ xfs-y += $(addprefix scrub/, \
agheader_repair.o \
alloc_repair.o \
array.o \
+ attr_repair.o \
bitmap.o \
blob.o \
bmap_repair.o \
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index 9faddb334a2c..beda402c35e8 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -38,9 +38,15 @@ xchk_setup_xattr_buf(
* We need enough space to read an xattr value from the file or enough
* space to hold three copies of the xattr free space bitmap. We don't
* need the buffer space for both purposes at the same time.
+ *
+ * If we're doing a repair, we need enough space to hold the largest
+ * xattr value and the largest xattr name.
*/
sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize);
- sz = max_t(size_t, sz, value_size);
+ if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR)
+ sz = max_t(size_t, sz, value_size + XATTR_NAME_MAX + 1);
+ else
+ sz = max_t(size_t, sz, value_size);
/*
* If there's already a buffer, figure out if we need to reallocate it
@@ -182,7 +188,7 @@ fail_xref:
* Within a char, the lowest bit of the char represents the byte with
* the smallest address
*/
-STATIC bool
+bool
xchk_xattr_set_map(
struct xfs_scrub *sc,
unsigned long *map,
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
index 13a1d2e8424d..b2d758953300 100644
--- a/fs/xfs/scrub/attr.h
+++ b/fs/xfs/scrub/attr.h
@@ -37,6 +37,16 @@ xchk_xattr_valuebuf(
return ab->buf;
}
+/* A place to store attribute names. */
+static inline unsigned char *
+xchk_xattr_namebuf(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return (unsigned char *)ab->buf + ab->sz - XATTR_NAME_MAX - 1;
+}
+
/* A bitmap of space usage computed by walking an attr leaf block. */
static inline unsigned long *
xchk_xattr_usedmap(
diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
new file mode 100644
index 000000000000..fb95f0191fed
--- /dev/null
+++ b/fs/xfs/scrub/attr_repair.c
@@ -0,0 +1,832 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_bmap.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/array.h"
+#include "scrub/blob.h"
+#include "scrub/attr.h"
+
+/*
+ * Extended Attribute Repair
+ * =========================
+ *
+ * We repair extended attributes by reading the attribute fork blocks looking
+ * for keys and values, then truncate the entire attr fork and reinsert all
+ * the attributes. Unfortunately, there's no secondary copy of most extended
+ * attribute data, which means that if we blow up midway through there's
+ * little we can do.
+ */
+
+struct xrep_xattr_key {
+ xblob_cookie value_cookie;
+ xblob_cookie name_cookie;
+ uint hash;
+ int flags;
+ uint32_t valuelen;
+ uint16_t namelen;
+} __packed;
+
+struct xrep_xattr {
+ struct xfs_scrub *sc;
+ struct xfbma *xattr_records;
+ struct xblob *xattr_blobs;
+
+ /* Size of the largest attribute value we're trying to salvage. */
+ size_t max_valuelen;
+};
+
+/*
+ * Decide if we want to salvage this attribute. We don't bother with
+ * incomplete or oversized keys or values.
+ */
+STATIC int
+xrep_xattr_want_salvage(
+ int flags,
+ const void *name,
+ int namelen,
+ int valuelen)
+{
+ if (flags & XFS_ATTR_INCOMPLETE)
+ return false;
+ if (namelen > XATTR_NAME_MAX || namelen <= 0)
+ return false;
+ if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
+ return false;
+ if (!xfs_attr_namecheck(name, namelen))
+ return false;
+ return true;
+}
+
+/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
+STATIC int
+xrep_xattr_salvage_key(
+ struct xrep_xattr *rx,
+ int flags,
+ unsigned char *name,
+ int namelen,
+ unsigned char *value,
+ int valuelen)
+{
+ struct xrep_xattr_key key = {
+ .valuelen = valuelen,
+ .flags = flags & (XFS_ATTR_ROOT | XFS_ATTR_SECURE),
+ .namelen = namelen,
+ };
+ int error = 0;
+
+ if (xchk_should_terminate(rx->sc, &error))
+ return error;
+
+ trace_xrep_xattr_salvage_key(rx->sc->ip, key.flags, name, namelen,
+ valuelen);
+
+ error = xblob_put(rx->xattr_blobs, &key.name_cookie, name, namelen);
+ if (error)
+ return error;
+ error = xblob_put(rx->xattr_blobs, &key.value_cookie, value, valuelen);
+ if (error)
+ return error;
+
+ key.hash = xfs_da_hashname(name, namelen);
+
+ error = xfbma_append(rx->xattr_records, &key);
+ if (error)
+ return error;
+
+ rx->max_valuelen = max_t(size_t, rx->max_valuelen, valuelen);
+ return 0;
+}
+
+/*
+ * Record a shortform extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_sf_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_sf_entry *sfe)
+{
+ unsigned char *value = &sfe->nameval[sfe->namelen];
+
+ if (!xrep_xattr_want_salvage(sfe->flags, sfe->nameval, sfe->namelen,
+ sfe->valuelen))
+ return 0;
+
+ return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
+ sfe->namelen, value, sfe->valuelen);
+}
+
+/*
+ * Record a local format extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_local_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_leaf_entry *ent,
+ unsigned int nameidx,
+ const char *buf_end,
+ struct xfs_attr_leaf_name_local *lentry)
+{
+ unsigned char *value;
+ unsigned long *usedmap = xchk_xattr_usedmap(rx->sc);
+ unsigned int valuelen;
+ unsigned int namesize;
+
+ /*
+ * Decode the leaf local entry format. If something seems wrong, we
+ * junk the attribute.
+ */
+ valuelen = be16_to_cpu(lentry->valuelen);
+ namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
+ if ((char *)lentry + namesize > buf_end)
+ return 0;
+ if (!xrep_xattr_want_salvage(ent->flags, lentry->nameval,
+ lentry->namelen, valuelen))
+ return 0;
+ if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize))
+ return 0;
+
+ /* Try to save this attribute. */
+ value = &lentry->nameval[lentry->namelen];
+ return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
+ lentry->namelen, value, valuelen);
+}
+
+/*
+ * Record a remote format extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_remote_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_leaf_entry *ent,
+ unsigned int nameidx,
+ const char *buf_end,
+ struct xfs_attr_leaf_name_remote *rentry,
+ unsigned int ent_idx,
+ struct xfs_buf *leaf_bp)
+{
+ struct xfs_da_args args = {
+ .trans = rx->sc->tp,
+ .dp = rx->sc->ip,
+ .index = ent_idx,
+ .geo = rx->sc->mp->m_attr_geo,
+ };
+ unsigned long *usedmap = xchk_xattr_usedmap(rx->sc);
+ unsigned char *value;
+ unsigned int valuelen;
+ unsigned int namesize;
+ int error;
+
+ /*
+ * Decode the leaf remote entry format. If something seems wrong, we
+ * junk the attribute. Note that we should never find a zero-length
+ * remote attribute value.
+ */
+ valuelen = be32_to_cpu(rentry->valuelen);
+ namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
+ if ((char *)rentry + namesize > buf_end)
+ return 0;
+ if (valuelen == 0 ||
+ !xrep_xattr_want_salvage(ent->flags, rentry->name, rentry->namelen,
+ valuelen))
+ return 0;
+ if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize))
+ return 0;
+
+ /*
+ * Find somewhere to save this value. We can't use the xchk_xattr_buf
+ * here because we're still using the memory for the attr block bitmap.
+ */
+ value = kmem_alloc_large(valuelen, KM_MAYFAIL);
+ if (!value)
+ return -ENOMEM;
+
+ /* Look up the remote value and stash it for reconstruction. */
+ args.valuelen = valuelen;
+ args.namelen = rentry->namelen;
+ args.name = rentry->name;
+ args.value = value;
+ error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
+ if (error || args.rmtblkno == 0)
+ goto err_free;
+
+ error = xfs_attr_rmtval_get(&args);
+ if (error)
+ goto err_free;
+
+ /* Try to save this attribute. */
+ error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
+ rentry->namelen, value, valuelen);
+err_free:
+ /* remote value was garbage, junk it */
+ if (error == -EFSBADCRC || error == -EFSCORRUPTED)
+ error = 0;
+ kmem_free(value);
+ return error;
+}
+
+/* Extract every xattr key that we can from this attr fork block. */
+STATIC int
+xrep_xattr_recover_leaf(
+ struct xrep_xattr *rx,
+ struct xfs_buf *bp)
+{
+ struct xfs_attr3_icleaf_hdr leafhdr;
+ struct xfs_scrub *sc = rx->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_attr_leafblock *leaf;
+ unsigned long *usedmap = xchk_xattr_usedmap(sc);
+ struct xfs_attr_leaf_name_local *lentry;
+ struct xfs_attr_leaf_name_remote *rentry;
+ struct xfs_attr_leaf_entry *ent;
+ struct xfs_attr_leaf_entry *entries;
+ char *buf_end;
+ size_t off;
+ unsigned int nameidx;
+ unsigned int hdrsize;
+ int i;
+ int error = 0;
+
+ bitmap_zero(usedmap, mp->m_attr_geo->blksize);
+
+ /* Check the leaf header */
+ leaf = bp->b_addr;
+ xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
+ hdrsize = xfs_attr3_leaf_hdr_size(leaf);
+ xchk_xattr_set_map(sc, usedmap, 0, hdrsize);
+ entries = xfs_attr3_leaf_entryp(leaf);
+
+ buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
+ for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
+ if (xchk_should_terminate(sc, &error))
+ break;
+
+ /* Skip key if it conflicts with something else? */
+ off = (char *)ent - (char *)leaf;
+ if (!xchk_xattr_set_map(sc, usedmap, off,
+ sizeof(xfs_attr_leaf_entry_t)))
+ continue;
+
+ /* Check the name information. */
+ nameidx = be16_to_cpu(ent->nameidx);
+ if (nameidx < leafhdr.firstused ||
+ nameidx >= mp->m_attr_geo->blksize)
+ continue;
+
+ if (ent->flags & XFS_ATTR_LOCAL) {
+ lentry = xfs_attr3_leaf_name_local(leaf, i);
+ error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
+ buf_end, lentry);
+ } else {
+ rentry = xfs_attr3_leaf_name_remote(leaf, i);
+ error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
+ buf_end, rentry, i, bp);
+ }
+ if (error)
+ break;
+ }
+
+ return error;
+}
+
+/* Try to recover shortform attrs. */
+STATIC int
+xrep_xattr_recover_sf(
+ struct xrep_xattr *rx)
+{
+ struct xfs_attr_shortform *sf;
+ struct xfs_attr_sf_entry *sfe;
+ struct xfs_attr_sf_entry *next;
+ struct xfs_ifork *ifp;
+ unsigned char *end;
+ int i;
+ int error;
+
+ ifp = XFS_IFORK_PTR(rx->sc->ip, XFS_ATTR_FORK);
+ sf = (struct xfs_attr_shortform *)rx->sc->ip->i_afp->if_u1.if_data;
+ end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
+
+ for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
+ if (xchk_should_terminate(rx->sc, &error))
+ break;
+
+ next = XFS_ATTR_SF_NEXTENTRY(sfe);
+ if ((unsigned char *)next > end)
+ break;
+
+ /* Ok, let's save this key/value. */
+ error = xrep_xattr_salvage_sf_attr(rx, sfe);
+ if (error)
+ return error;
+
+ sfe = next;
+ }
+
+ return 0;
+}
+
+/*
+ * Blindly walk every block in an attr fork without stumbling over incore
+ * buffers for remote attr value blocks.
+ *
+ * Attribute leaf and node blocks are simple -- they're a single block, so we
+ * can walk them one at a time and we never have to worry about discontiguous
+ * multiblock buffers like we do for directories.
+ *
+ * Unfortunately, remote attr blocks add a lot of complexity here. Each disk
+ * block is totally self contained, in the sense that the v5 header provides no
+ * indication that there could be more data in the next block. The incore
+ * buffers can span multiple blocks, though they never cross extent records.
+ * However, they don't necessarily start or end on an extent record boundary.
+ *
+ * Because the buffer cache get function complains if it finds a buffer
+ * matching the block number but not matching the length, we must be careful to
+ * look for incore buffers (up to the maximum length of a remote value) that
+ * could be hiding anywhere in the extent record. If we find an incore buffer,
+ * we can pass that to the callback function. Otherwise, read a single block
+ * and pass that to the callback. Note the subtlety that remote attr value
+ * blocks for which there is no incore buffer will be passed to the callback
+ * one block at a time.
+ *
+ * The caller must hold the ILOCK. We use XBF_TRYLOCK here to skip any locked
+ * buffer on the assumption that we don't own the block and don't want to hang
+ * the system on a potentially garbage buffer.
+ *
+ * XREP_ATTR_WALK_INCORE: don't read buffers from disk.
+ */
+#define XREP_ATTR_WALK_INCORE (1U << 0)
+STATIC int
+xrep_attr_walk_blind(
+ struct xfs_inode *ip,
+ unsigned int flags,
+ int (*fn)(struct xfs_inode *ip, xfs_dablk_t dabno,
+ struct xfs_buf *bp, void *priv),
+ void *priv)
+{
+ struct xfs_bmbt_irec map;
+ struct xfs_mount *mp = ip->i_mount;
+ xfs_fileoff_t offset = 0;
+ xfs_fileoff_t end = XFS_MAX_FILEOFF;
+ xfs_filblks_t len;
+ xfs_fsblock_t fsbno;
+ xfs_dablk_t dabno;
+ int max_rmt_blocks;
+ int nmap;
+ int error = 0;
+
+ ASSERT(ip->i_mount->m_attr_geo->fsbcount == 1);
+
+ max_rmt_blocks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX);
+
+ for (offset = 0;
+ offset < end;
+ offset = map.br_startoff + map.br_blockcount) {
+ /* Walk the attr fork piece by piece... */
+ nmap = 1;
+ error = xfs_bmapi_read(ip, offset, end - offset,
+ &map, &nmap, XFS_BMAPI_ATTRFORK);
+ if (error)
+ return error;
+ if (nmap != 1)
+ return -EFSCORRUPTED;
+ if (!xfs_bmap_is_real_extent(&map))
+ continue;
+
+ for (dabno = map.br_startoff, fsbno = map.br_startblock;
+ dabno < map.br_startoff + map.br_blockcount;
+ dabno += len, fsbno += len) {
+ struct xfs_buf *bp;
+ xfs_daddr_t daddr = XFS_FSB_TO_DADDR(mp, fsbno);
+
+ len = min_t(xfs_filblks_t, map.br_blockcount,
+ max_rmt_blocks);
+
+ /*
+ * Look for an incore buffer for every possible rmt
+ * or leaf block that could start at this physical
+ * position.
+ */
+ while (len > 0) {
+ bp = xfs_buf_incore(mp->m_ddev_targp, daddr,
+ XFS_FSB_TO_BB(mp, len),
+ XBF_TRYLOCK | XBF_SCAN_STALE);
+ if (bp)
+ goto dispatch_fn;
+
+ len--;
+ }
+
+ if (flags & XREP_ATTR_WALK_INCORE)
+ continue;
+
+ /*
+ * If we didn't find a buffer, read 1 block from disk.
+ * We don't attach any buffer ops.
+ */
+ len = 1;
+ error = xfs_buf_read(mp->m_ddev_targp, daddr,
+ XFS_FSB_TO_BB(mp, len),
+ XBF_TRYLOCK, &bp, NULL);
+ if (error)
+ return error;
+
+dispatch_fn:
+ /* Call the callback function. */
+ error = fn(ip, dabno, bp, priv);
+ xfs_buf_relse(bp);
+ if (error)
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+/* Deal with a buffer that we found during our walk of the attr fork. */
+STATIC int
+xrep_xattr_recover_block(
+ struct xfs_inode *ip,
+ xfs_dablk_t dabno,
+ struct xfs_buf *bp,
+ void *priv)
+{
+ struct xrep_xattr *rx = priv;
+ struct xfs_da_blkinfo *info = bp->b_addr;
+ int error = 0;
+
+ trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
+ be16_to_cpu(info->magic));
+
+ /*
+ * If the buffer has the right magic number for an attr leaf block and
+ * passes a structure check (we don't care about checksums), salvage
+ * as much as we can from the block. */
+ if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
+ xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops))
+ error = xrep_xattr_recover_leaf(rx, bp);
+
+ /*
+ * If the buffer didn't already have buffer ops set, mark it stale so
+ * that it doesn't hang around in memory to cause problems.
+ */
+ if (bp->b_ops == NULL)
+ xfs_buf_stale(bp);
+ return error;
+}
+
+/* Extract as many attribute keys and values as we can. */
+STATIC int
+xrep_xattr_recover(
+ struct xrep_xattr *rx)
+{
+ struct xfs_scrub *sc = rx->sc;
+ int error = 0;
+
+ if (sc->ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ return xrep_xattr_recover_sf(rx);
+
+ /*
+ * Set the xchk_attr_buf to be as large as we're going to need it to be
+ * to compute space usage bitmaps for each attr block we try to
+ * salvage. We don't salvage attrs whose name and value areas are
+ * crosslinked with anything else.
+ */
+ error = xchk_setup_xattr_buf(sc, 0, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+
+ return xrep_attr_walk_blind(sc->ip, 0, xrep_xattr_recover_block, rx);
+}
+
+/*
+ * Reset the extended attribute fork to a state where we can start re-adding
+ * the salvaged attributes.
+ */
+STATIC void
+xrep_xattr_fork_remove(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ struct xfs_attr_sf_hdr *hdr;
+ struct xfs_ifork *ifp;
+
+ /*
+ * If the data fork is in btree format, we can't change di_forkoff
+ * because we could run afoul of the rule that the data fork isn't
+ * supposed to be in btree format if there's enough space in the fork
+ * that it could have used extents format. Instead, reinitialize the
+ * attr fork to have a shortform structure with zero attributes.
+ */
+ if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
+ ip->i_d.di_aformat = XFS_DINODE_FMT_LOCAL;
+ ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+ ifp->if_flags &= ~XFS_IFEXTENTS;
+ ifp->if_flags |= XFS_IFINLINE;
+ xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
+ XFS_ATTR_FORK);
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ hdr->count = 0;
+ hdr->totsize = cpu_to_be16(sizeof(*hdr));
+ xfs_trans_log_inode(sc->tp, ip,
+ XFS_ILOG_CORE | XFS_ILOG_ADATA);
+ return;
+ }
+
+ xfs_attr_fork_remove(ip, sc->tp);
+}
+
+/* Rip the buffer ops off a block so that it can be marked stale. */
+STATIC int
+xrep_xattr_stale_block(
+ struct xfs_inode *ip,
+ xfs_dablk_t dabno,
+ struct xfs_buf *bp,
+ void *priv)
+{
+ xfs_buf_stale(bp);
+ return 0;
+}
+
+/*
+ * Free all the attribute fork blocks and delete the fork. The caller must
+ * join the inode to the transaction. This function returns with the inode
+ * joined to a clean scrub transaction.
+ */
+STATIC int
+xrep_xattr_reset_fork(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (sc->ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL)
+ goto zap;
+
+ /* Invalidate each attr block in the attr fork. */
+ error = xrep_attr_walk_blind(sc->ip, XREP_ATTR_WALK_INCORE,
+ xrep_xattr_stale_block, NULL);
+ if (error)
+ return error;
+
+ /* Now free all the blocks. */
+ error = xfs_bunmapi_range(&sc->tp, sc->ip, XFS_ATTR_FORK, 0,
+ XFS_MAX_FILEOFF, XFS_BMAPI_NODISCARD);
+ if (error)
+ return error;
+
+zap:
+ xrep_xattr_fork_remove(sc, sc->ip);
+ return xrep_roll_trans(sc);
+}
+
+/*
+ * Compare two xattr keys. ATTR_SECURE keys come before ATTR_ROOT and
+ * ATTR_ROOT keys come before user attrs. Otherwise sort in hash order.
+ */
+static int
+xrep_xattr_key_cmp(
+ const void *a,
+ const void *b)
+{
+ const struct xrep_xattr_key *ap = a;
+ const struct xrep_xattr_key *bp = b;
+
+ if (ap->flags > bp->flags)
+ return 1;
+ else if (ap->flags < bp->flags)
+ return -1;
+
+ if (ap->hash > bp->hash)
+ return 1;
+ else if (ap->hash < bp->hash)
+ return -1;
+ return 0;
+}
+
+/*
+ * Find all the extended attributes for this inode by scraping them out of the
+ * attribute key blocks by hand. The caller must clean up the lists if
+ * anything goes wrong.
+ */
+STATIC int
+xrep_xattr_find_attributes(
+ struct xrep_xattr *rx)
+{
+ struct xfs_inode *ip = rx->sc->ip;
+ struct xfs_ifork *ifp;
+ int error;
+
+ error = xrep_ino_dqattach(rx->sc);
+ if (error)
+ return error;
+
+ /* Extent map should be loaded. */
+ ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+ if (XFS_IFORK_FORMAT(ip, XFS_ATTR_FORK) != XFS_DINODE_FMT_LOCAL &&
+ !(ifp->if_flags & XFS_IFEXTENTS)) {
+ error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+ }
+
+ /* Read every attr key and value and record them in memory. */
+ error = xrep_xattr_recover(rx);
+ if (error)
+ return error;
+
+ /*
+ * Reset the xchk_attr_buf to be as large as we're going to need it to
+ * be to store each attribute name and value as we re-add them to the
+ * file. We must preallocate the memory here because once we start
+ * to modify the filesystem we cannot afford an ENOMEM.
+ */
+ error = xchk_setup_xattr_buf(rx->sc, rx->max_valuelen, KM_MAYFAIL);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+
+ return 0;
+}
+
+/* Insert one xattr key/value. */
+STATIC int
+xrep_xattr_insert_rec(
+ const void *item,
+ void *priv)
+{
+ struct xfs_da_args args = { NULL };
+ const struct xrep_xattr_key *key = item;
+ struct xrep_xattr *rx = priv;
+ unsigned char *name = xchk_xattr_namebuf(rx->sc);
+ unsigned char *value = xchk_xattr_valuebuf(rx->sc);
+ int error;
+
+ /*
+ * The attribute name is stored near the end of the in-core buffer,
+ * though we reserve one more byte to ensure null termination.
+ */
+ name[XATTR_NAME_MAX] = 0;
+
+ error = xblob_get(rx->xattr_blobs, key->name_cookie, name,
+ key->namelen);
+ if (error)
+ return error;
+
+ error = xblob_free(rx->xattr_blobs, key->name_cookie);
+ if (error)
+ return error;
+
+ error = xblob_get(rx->xattr_blobs, key->value_cookie, value,
+ key->valuelen);
+ if (error)
+ return error;
+
+ error = xblob_free(rx->xattr_blobs, key->value_cookie);
+ if (error)
+ return error;
+
+ name[key->namelen] = 0;
+
+ trace_xrep_xattr_insert_rec(rx->sc->ip, key->flags, name, key->namelen,
+ key->valuelen);
+
+ args.dp = rx->sc->ip;
+ args.attr_filter = key->flags;
+ args.name = name;
+ args.namelen = key->namelen;
+ args.value = value;
+ args.valuelen = key->valuelen;
+ return xfs_attr_set(&args);
+}
+
+/*
+ * Insert all the attributes that we collected.
+ *
+ * Commit the repair transaction and drop the ilock because the attribute
+ * setting code needs to be able to allocate special transactions and take the
+ * ilock on its own. Some day we'll have deferred attribute setting, at which
+ * point we'll be able to use that to replace the attributes atomically and
+ * safely.
+ */
+STATIC int
+xrep_xattr_rebuild_tree(
+ struct xrep_xattr *rx)
+{
+ int error;
+
+ /*
+ * Commit the repair transaction and drop the ILOCK so that we can
+ * use individual transactions to re-add each extended attribute.
+ */
+ error = xfs_trans_commit(rx->sc->tp);
+ rx->sc->tp = NULL;
+ if (error)
+ return error;
+
+ /*
+ * Drop the ILOCK so that we don't pin the tail of the log. We still
+ * hold the IOLOCK (aka i_rwsem) which will prevent attr modifications,
+ * but there's nothing to prevent userspace from reading/listing the
+ * attrs while we build a new attr fork. Oh well, at least the fs
+ * can't shut down those threads if they stumble into corrupt blocks.
+ */
+ xfs_iunlock(rx->sc->ip, XFS_ILOCK_EXCL);
+ rx->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
+
+ /*
+ * Sort the attribute keys by hash to minimize dabtree splits when we
+ * rebuild the extended attribute information.
+ */
+ error = xfbma_sort(rx->xattr_records, xrep_xattr_key_cmp);
+ if (error)
+ return error;
+
+ /* Re-add every attr to the file. */
+ return xfbma_iter_del(rx->xattr_records, xrep_xattr_insert_rec, rx);
+}
+
+/*
+ * Repair the extended attribute metadata.
+ *
+ * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
+ * The buffer cache in XFS can't handle aliased multiblock buffers, so this
+ * might misbehave if the attr fork is crosslinked with other filesystem
+ * metadata.
+ */
+int
+xrep_xattr(
+ struct xfs_scrub *sc)
+{
+ struct xrep_xattr rx = {
+ .sc = sc,
+ };
+ int error;
+
+ if (!xfs_inode_hasattr(sc->ip))
+ return -ENOENT;
+
+ /* Set up some storage */
+ rx.xattr_records = xfbma_init(sizeof(struct xrep_xattr_key));
+ if (IS_ERR(rx.xattr_records))
+ return PTR_ERR(rx.xattr_records);
+ rx.xattr_blobs = xblob_init();
+ if (IS_ERR(rx.xattr_blobs)) {
+ error = PTR_ERR(rx.xattr_blobs);
+ goto out_arr;
+ }
+
+ /* Collect extended attributes by parsing raw blocks. */
+ error = xrep_xattr_find_attributes(&rx);
+ if (error)
+ goto out;
+
+ /*
+ * Invalidate and truncate all attribute fork extents. This is the
+ * point at which we are no longer able to bail out gracefully.
+ * We commit the transaction here because xfs_attr_set allocates its
+ * own transactions.
+ */
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ error = xrep_xattr_reset_fork(sc);
+ if (error)
+ goto out;
+
+ /* Now rebuild the attribute information. */
+ error = xrep_xattr_rebuild_tree(&rx);
+out:
+ xblob_destroy(rx.xattr_blobs);
+out_arr:
+ xfbma_destroy(rx.xattr_records);
+ return error;
+}
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index e59338b83838..8f3e078938af 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1360,3 +1360,34 @@ xrep_reset_perag_resv(
out:
return error;
}
+
+/*
+ * See if this buffer can pass the given ->verify_struct() function.
+ *
+ * If the buffer already has ops attached and they're not the ones that were
+ * passed in, we reject the buffer. Otherwise, we perform the structure test
+ * (note that we do not check CRCs) and return the outcome of the test. The
+ * buffer ops and error state are left unchanged.
+ */
+bool
+xrep_buf_verify_struct(
+ struct xfs_buf *bp,
+ const struct xfs_buf_ops *ops)
+{
+ const struct xfs_buf_ops *old_ops = bp->b_ops;
+ xfs_failaddr_t fa;
+ int old_error;
+
+ if (old_ops) {
+ if (old_ops != ops)
+ return false;
+ }
+
+ old_error = bp->b_error;
+ bp->b_ops = ops;
+ fa = bp->b_ops->verify_struct(bp);
+ bp->b_ops = old_ops;
+ bp->b_error = old_error;
+
+ return fa == NULL;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index ed5ac3ee1edb..fe1293334899 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -77,6 +77,7 @@ int xrep_inode(struct xfs_scrub *sc);
int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_symlink(struct xfs_scrub *sc);
+int xrep_xattr(struct xfs_scrub *sc);
struct xrep_newbt_resv {
/* Link to list of extents that we've reserved. */
@@ -134,6 +135,8 @@ int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr,
void xrep_bload_estimate_slack(struct xfs_scrub *sc,
struct xfs_btree_bload *bload);
+bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
+
#else
static inline int xrep_attempt(
@@ -178,6 +181,7 @@ xrep_reset_perag_resv(
#define xrep_bmap_data xrep_notsupported
#define xrep_bmap_attr xrep_notsupported
#define xrep_symlink xrep_notsupported
+#define xrep_xattr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 21357228a7f8..df55dbd700a1 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -298,7 +298,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_xattr,
.scrub = xchk_xattr,
- .repair = xrep_notsupported,
+ .repair = xrep_xattr,
},
[XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
.type = ST_INODE,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 218758b76e74..00c26cae38ef 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -171,4 +171,7 @@ struct xchk_fscounters {
unsigned long long icount_max;
};
+bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map,
+ unsigned int start, unsigned int len);
+
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 2c6c248be823..bb660f75cc56 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -12,6 +12,7 @@
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_btree.h"
+#include "xfs_da_format.h"
#include "scrub/scrub.h"
/* Figure out which block the btree cursor was pointing to. */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 661364b0180c..a2573dcd63f7 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1050,6 +1050,91 @@ TRACE_EVENT(xfbma_sort_stats,
__entry->error)
);
+TRACE_EVENT(xrep_xattr_recover_leafblock,
+ TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic),
+ TP_ARGS(ip, dabno, magic),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_dablk_t, dabno)
+ __field(uint16_t, magic)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->dabno = dabno;
+ __entry->magic = magic;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx dablk %u magic 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->dabno,
+ __entry->magic)
+);
+
+#define XFS_ATTR_NSP_STR \
+ { XFS_ATTR_LOCAL, "local" }, \
+ { XFS_ATTR_ROOT, "root" }, \
+ { XFS_ATTR_SECURE, "secure" }
+
+TRACE_EVENT(xrep_xattr_salvage_key,
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
+ unsigned int namelen, unsigned int valuelen),
+ TP_ARGS(ip, flags, name, namelen, valuelen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, flags)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen)
+ __field(unsigned int, valuelen)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->flags = flags;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __entry->valuelen = valuelen;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->flags, "|", XFS_ATTR_NSP_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->valuelen)
+);
+
+TRACE_EVENT(xrep_xattr_insert_rec,
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
+ unsigned int namelen, unsigned int valuelen),
+ TP_ARGS(ip, flags, name, namelen, valuelen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, flags)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen)
+ __field(unsigned int, valuelen)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->flags = flags;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __entry->valuelen = valuelen;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen %u",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->flags, "|", XFS_ATTR_NSP_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->valuelen)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 65f8a1990acc..25292ce9eddd 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -520,8 +520,12 @@ _xfs_buf_obj_cmp(
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching for an exact match.
+ *
+ * Note: If we're scanning for incore buffers to stale, don't
+ * complain if we find non-stale buffers.
*/
- ASSERT(bp->b_flags & XBF_STALE);
+ if (!(map->bm_flags & XBM_SCAN_STALE))
+ ASSERT(bp->b_flags & XBF_STALE);
return 1;
}
return 0;
@@ -587,6 +591,9 @@ xfs_buf_find(
*found_bp = NULL;
+ if (flags & XBF_SCAN_STALE)
+ cmap.bm_flags |= XBM_SCAN_STALE;
+
for (i = 0; i < nmaps; i++)
cmap.bm_len += map[i].bm_len;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index deaa9c2607af..c8643b629caa 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -33,6 +33,12 @@
/* flags used only as arguments to access routines */
#define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */
#define XBF_UNMAPPED (1 << 17)/* do not map the buffer */
+/*
+ * The caller is scanning for incore buffers to mark stale after a repair.
+ * Don't complain if we find a non-stale buffer of the wrong length, that's
+ * exactly the point.
+ */
+#define XBF_SCAN_STALE (1 << 18)
/* flags used only internally */
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
@@ -102,6 +108,8 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *);
struct xfs_buf_map {
xfs_daddr_t bm_bn; /* block number for I/O */
int bm_len; /* size of I/O */
+ unsigned int bm_flags;
+#define XBM_SCAN_STALE (1 << 0) /* see XBF_SCAN_STALE */
};
#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \