summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:05 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-09-17 18:55:01 -0700
commitfe73785474465088443b88ffa203de7eb74bc62a (patch)
treea7a23e5deab00a1ae397dddc942427e6641d48a3
parent571cbbcaa818fbc554d347f547e2bc6b0cb3cd1a (diff)
xfs: repair extended attributes
If the extended attributes look bad, try to sift through the rubble to find whatever keys/values we can, stage a new attribute structure in a temporary file and use the atomic extent swapping mechanism to commit the results in bulk. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/libxfs/xfs_attr.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr.h1
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h5
-rw-r--r--fs/xfs/scrub/array.c24
-rw-r--r--fs/xfs/scrub/array.h2
-rw-r--r--fs/xfs/scrub/attr.c7
-rw-r--r--fs/xfs/scrub/attr.h10
-rw-r--r--fs/xfs/scrub/attr_repair.c1410
-rw-r--r--fs/xfs/scrub/bitmap.c22
-rw-r--r--fs/xfs/scrub/bitmap.h1
-rw-r--r--fs/xfs/scrub/blob.c24
-rw-r--r--fs/xfs/scrub/blob.h2
-rw-r--r--fs/xfs/scrub/repair.c137
-rw-r--r--fs/xfs/scrub/repair.h9
-rw-r--r--fs/xfs/scrub/scrub.c2
-rw-r--r--fs/xfs/scrub/scrub.h3
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h80
-rw-r--r--fs/xfs/xfs_buf.c9
-rw-r--r--fs/xfs/xfs_buf.h9
-rw-r--r--fs/xfs/xfs_trace.h2
22 files changed, 1758 insertions, 5 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index d6af3525a5d9..0c33b5c70567 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -175,6 +175,7 @@ ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
xfs-y += $(addprefix scrub/, \
agheader_repair.o \
alloc_repair.o \
+ attr_repair.o \
bitmap.o \
blob.o \
bmap_repair.o \
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index fbc9d816882c..97e617d480e4 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -833,7 +833,7 @@ out_trans_cancel:
* External routines when attribute list is inside the inode
*========================================================================*/
-static inline int xfs_attr_sf_totsize(struct xfs_inode *dp)
+int xfs_attr_sf_totsize(struct xfs_inode *dp)
{
struct xfs_attr_shortform *sf;
diff --git a/fs/xfs/libxfs/xfs_attr.h b/fs/xfs/libxfs/xfs_attr.h
index 5e71f719bdd5..36003a42029e 100644
--- a/fs/xfs/libxfs/xfs_attr.h
+++ b/fs/xfs/libxfs/xfs_attr.h
@@ -495,5 +495,6 @@ int xfs_attr_remove_iter(struct xfs_delattr_context *dac);
bool xfs_attr_namecheck(const void *name, size_t length);
void xfs_delattr_context_init(struct xfs_delattr_context *dac,
struct xfs_da_args *args);
+int xfs_attr_sf_totsize(struct xfs_inode *dp);
#endif /* __XFS_ATTR_H__ */
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index 5a49caa5c9df..6cca4e5880f7 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -694,6 +694,11 @@ struct xfs_attr3_leafblock {
#define XFS_ATTR_INCOMPLETE (1 << XFS_ATTR_INCOMPLETE_BIT)
#define XFS_ATTR_NSP_ONDISK_MASK (XFS_ATTR_ROOT | XFS_ATTR_SECURE)
+#define XFS_ATTR_NAMESPACE_STR \
+ { XFS_ATTR_LOCAL, "local" }, \
+ { XFS_ATTR_ROOT, "root" }, \
+ { XFS_ATTR_SECURE, "secure" }
+
/*
* Alignment for namelist and valuelist entries (since they are mixed
* there can be only one alignment value)
diff --git a/fs/xfs/scrub/array.c b/fs/xfs/scrub/array.c
index 35a60b790e73..d5c8217e5360 100644
--- a/fs/xfs/scrub/array.c
+++ b/fs/xfs/scrub/array.c
@@ -663,3 +663,27 @@ xfbma_iter_get(
*idx = cur;
return 0;
}
+
+/* How many bytes is this array consuming? */
+loff_t
+xfbma_bytes(
+ struct xfbma *array)
+{
+ struct kstat statbuf;
+ int ret;
+
+ ret = xfile_statx(array->xfile, &statbuf);
+ if (ret)
+ return ret;
+
+ return statbuf.blocks * 512;
+}
+
+/* Empty the entire array. */
+void
+xfbma_truncate(
+ struct xfbma *array)
+{
+ xfile_discard(array->xfile, 0, MAX_LFS_FILESIZE);
+ array->nr = 0;
+}
diff --git a/fs/xfs/scrub/array.h b/fs/xfs/scrub/array.h
index ba7770ba5d74..05f0d333f077 100644
--- a/fs/xfs/scrub/array.h
+++ b/fs/xfs/scrub/array.h
@@ -27,6 +27,8 @@ int xfbma_set(struct xfbma *array, uint64_t idx, void *ptr);
int xfbma_insert_anywhere(struct xfbma *array, void *ptr);
bool xfbma_is_null(struct xfbma *array, void *ptr);
int xfbma_nullify(struct xfbma *array, uint64_t idx);
+void xfbma_truncate(struct xfbma *array);
+loff_t xfbma_bytes(struct xfbma *array);
/* Append an element to the array. */
static inline int xfbma_append(struct xfbma *array, void *ptr)
diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c
index b6f0c9f3f124..6813438b20ce 100644
--- a/fs/xfs/scrub/attr.c
+++ b/fs/xfs/scrub/attr.c
@@ -19,6 +19,7 @@
#include "scrub/common.h"
#include "scrub/dabtree.h"
#include "scrub/attr.h"
+#include "scrub/repair.h"
/*
* Allocate enough memory to hold an attr value and attr block bitmaps,
@@ -73,6 +74,10 @@ xchk_setup_xattr(
{
int error;
+ error = xrep_setup_tempfile(sc, S_IFREG);
+ if (error)
+ return error;
+
/*
* We failed to get memory while checking attrs, so this time try to
* get all the memory we're ever going to need. Allocate the buffer
@@ -182,7 +187,7 @@ fail_xref:
* Within a char, the lowest bit of the char represents the byte with
* the smallest address
*/
-STATIC bool
+bool
xchk_xattr_set_map(
struct xfs_scrub *sc,
unsigned long *map,
diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h
index 1719e1c4da59..ee245a1264a7 100644
--- a/fs/xfs/scrub/attr.h
+++ b/fs/xfs/scrub/attr.h
@@ -37,6 +37,16 @@ xchk_xattr_valuebuf(
return ab->buf;
}
+/* A place to store attribute names. */
+static inline unsigned char *
+xchk_xattr_namebuf(
+ struct xfs_scrub *sc)
+{
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ return (unsigned char *)ab->buf + ab->sz - XATTR_NAME_MAX - 1;
+}
+
/* A bitmap of space usage computed by walking an attr leaf block. */
static inline unsigned long *
xchk_xattr_usedmap(
diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c
new file mode 100644
index 000000000000..d7f7afb71a70
--- /dev/null
+++ b/fs/xfs/scrub/attr_repair.c
@@ -0,0 +1,1410 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_btree.h"
+#include "xfs_bit.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_sb.h"
+#include "xfs_inode.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_dir2.h"
+#include "xfs_attr.h"
+#include "xfs_attr_leaf.h"
+#include "xfs_attr_sf.h"
+#include "xfs_attr_remote.h"
+#include "xfs_bmap.h"
+#include "xfs_bmap_util.h"
+#include "xfs_swapext.h"
+#include "xfs_xchgrange.h"
+#include "scrub/xfs_scrub.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/repair.h"
+#include "scrub/array.h"
+#include "scrub/blob.h"
+#include "scrub/attr.h"
+
+/*
+ * Extended Attribute Repair
+ * =========================
+ *
+ * We repair extended attributes by reading the attribute fork blocks looking
+ * for keys and values, then truncate the entire attr fork and reinsert all
+ * the attributes. Unfortunately, there's no secondary copy of most extended
+ * attribute data, which means that if we blow up midway through there's
+ * little we can do.
+ */
+
+struct xrep_xattr_key {
+ xblob_cookie value_cookie;
+ xblob_cookie name_cookie;
+ uint hash;
+ int flags;
+ uint32_t valuelen;
+ uint16_t namelen;
+};
+
+struct xrep_xattr {
+ struct xfs_scrub *sc;
+ struct xfbma *xattr_records;
+ struct xblob *xattr_blobs;
+
+ /* Number of attributes that we are salvaging. */
+ unsigned long long attrs_found;
+};
+
+/* Absorb up to 8 pages of attrs before we flush them to the temp file. */
+#define XREP_XATTR_SALVAGE_BYTES (PAGE_SIZE * 8)
+
+/*
+ * Allocate enough memory to hold whatever we need to salvage an attr block.
+ * Buffer contents can be preserved, unlike in the scrub counterpart to this
+ * function.
+ */
+STATIC int
+xrep_setup_xattr_buf(
+ struct xfs_scrub *sc,
+ size_t value_size,
+ bool preserve)
+{
+ size_t sz;
+ struct xchk_xattr_buf *new_ab;
+ struct xchk_xattr_buf *ab = sc->buf;
+
+ ASSERT(!preserve || ab != NULL);
+
+ /*
+ * We need enough space to hold a bitmap for the used space within an
+ * attr block; the name of a salvaged attr; and the value of a salvaged
+ * attr.
+ */
+ sz = sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize) +
+ value_size + XATTR_NAME_MAX + 1;
+
+ /*
+ * If there's already a buffer, figure out if we need to reallocate it
+ * to accommodate a larger size.
+ */
+ if (ab && ab->sz >= sz)
+ return 0;
+
+ /* Give back the old memory as soon as we can, to reduce pressure. */
+ if (!preserve && ab) {
+ kmem_free(ab);
+ ab = NULL;
+ }
+
+ new_ab = kvmalloc(sizeof(struct xchk_xattr_buf) + sz,
+ GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
+ if (!new_ab)
+ return -ENOMEM;
+
+ if (ab) {
+ memcpy(new_ab, ab, ab->sz);
+ kmem_free(ab);
+ }
+ new_ab->sz = sz;
+ sc->buf = new_ab;
+ return 0;
+}
+
+/*
+ * While we're salvaging the contents of an xattr block, the first part of the
+ * buffer contains a bitmap of the parts of the block that we've already seen.
+ * Therefore, salvaged values /must/ be stored after the bitmap.
+ */
+static inline unsigned char *
+xrep_xattr_salvage_valuebuf(
+ struct xfs_scrub *sc)
+{
+ return (unsigned char *)(xchk_xattr_usedmap(sc) +
+ BITS_TO_LONGS(sc->mp->m_attr_geo->blksize));
+}
+
+/*
+ * Decide if we want to salvage this attribute. We don't bother with
+ * incomplete or oversized keys or values.
+ */
+STATIC int
+xrep_xattr_want_salvage(
+ int flags,
+ const void *name,
+ int namelen,
+ int valuelen)
+{
+ if (flags & XFS_ATTR_INCOMPLETE)
+ return false;
+ if (namelen > XATTR_NAME_MAX || namelen <= 0)
+ return false;
+ if (valuelen > XATTR_SIZE_MAX || valuelen < 0)
+ return false;
+ if (!xfs_attr_namecheck(name, namelen))
+ return false;
+ return true;
+}
+
+/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */
+STATIC int
+xrep_xattr_salvage_key(
+ struct xrep_xattr *rx,
+ int flags,
+ unsigned char *name,
+ int namelen,
+ unsigned char *value,
+ int valuelen)
+{
+ struct xrep_xattr_key key = {
+ .valuelen = valuelen,
+ .flags = flags & (XFS_ATTR_ROOT | XFS_ATTR_SECURE),
+ .namelen = namelen,
+ };
+ int error = 0;
+
+ if (xchk_should_terminate(rx->sc, &error))
+ return error;
+
+ trace_xrep_xattr_salvage_key(rx->sc->ip, key.flags, name, namelen,
+ valuelen);
+
+ error = xblob_put(rx->xattr_blobs, &key.name_cookie, name, namelen);
+ if (error)
+ return error;
+ error = xblob_put(rx->xattr_blobs, &key.value_cookie, value, valuelen);
+ if (error)
+ return error;
+
+ key.hash = xfs_da_hashname(name, namelen);
+
+ error = xfbma_append(rx->xattr_records, &key);
+ if (error)
+ return error;
+
+ rx->attrs_found++;
+ return 0;
+}
+
+/*
+ * Record a shortform extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_sf_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_sf_entry *sfe)
+{
+ unsigned char *value = &sfe->nameval[sfe->namelen];
+
+ if (!xrep_xattr_want_salvage(sfe->flags, sfe->nameval, sfe->namelen,
+ sfe->valuelen))
+ return 0;
+
+ return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval,
+ sfe->namelen, value, sfe->valuelen);
+}
+
+/*
+ * Record a local format extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_local_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_leaf_entry *ent,
+ unsigned int nameidx,
+ const char *buf_end,
+ struct xfs_attr_leaf_name_local *lentry)
+{
+ unsigned char *value;
+ unsigned long *usedmap = xchk_xattr_usedmap(rx->sc);
+ unsigned int valuelen;
+ unsigned int namesize;
+
+ /*
+ * Decode the leaf local entry format. If something seems wrong, we
+ * junk the attribute.
+ */
+ valuelen = be16_to_cpu(lentry->valuelen);
+ namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen);
+ if ((char *)lentry + namesize > buf_end)
+ return 0;
+ if (!xrep_xattr_want_salvage(ent->flags, lentry->nameval,
+ lentry->namelen, valuelen))
+ return 0;
+ if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize))
+ return 0;
+
+ /* Try to save this attribute. */
+ value = &lentry->nameval[lentry->namelen];
+ return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval,
+ lentry->namelen, value, valuelen);
+}
+
+/*
+ * Record a remote format extended attribute key & value for later reinsertion
+ * into the inode.
+ */
+STATIC int
+xrep_xattr_salvage_remote_attr(
+ struct xrep_xattr *rx,
+ struct xfs_attr_leaf_entry *ent,
+ unsigned int nameidx,
+ const char *buf_end,
+ struct xfs_attr_leaf_name_remote *rentry,
+ unsigned int ent_idx,
+ struct xfs_buf *leaf_bp)
+{
+ struct xfs_da_args args = {
+ .trans = rx->sc->tp,
+ .dp = rx->sc->ip,
+ .index = ent_idx,
+ .geo = rx->sc->mp->m_attr_geo,
+ };
+ unsigned long *usedmap = xchk_xattr_usedmap(rx->sc);
+ unsigned char *value;
+ unsigned int valuelen;
+ unsigned int namesize;
+ int error;
+
+ /*
+ * Decode the leaf remote entry format. If something seems wrong, we
+ * junk the attribute. Note that we should never find a zero-length
+ * remote attribute value.
+ */
+ valuelen = be32_to_cpu(rentry->valuelen);
+ namesize = xfs_attr_leaf_entsize_remote(rentry->namelen);
+ if ((char *)rentry + namesize > buf_end)
+ return 0;
+ if (valuelen == 0 ||
+ !xrep_xattr_want_salvage(ent->flags, rentry->name, rentry->namelen,
+ valuelen))
+ return 0;
+ if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize))
+ return 0;
+
+ /*
+ * Enlarge the buffer (if needed) to hold the value that we're trying
+ * to salvage from the old extended attribute data. The usedmap
+ * pointer itself may be invalid after this point, but we must keep the
+ * bitmap.
+ */
+ error = xrep_setup_xattr_buf(rx->sc, valuelen, true);
+ if (error == -ENOMEM)
+ error = -EDEADLOCK;
+ if (error)
+ return error;
+ value = xrep_xattr_salvage_valuebuf(rx->sc);
+
+ /* Look up the remote value and stash it for reconstruction. */
+ args.valuelen = valuelen;
+ args.namelen = rentry->namelen;
+ args.name = rentry->name;
+ args.value = value;
+ error = xfs_attr3_leaf_getvalue(leaf_bp, &args);
+ if (error || args.rmtblkno == 0)
+ goto err_free;
+
+ error = xfs_attr_rmtval_get(&args);
+ if (error)
+ goto err_free;
+
+ /* Try to save this attribute. */
+ error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name,
+ rentry->namelen, value, valuelen);
+err_free:
+ /* remote value was garbage, junk it */
+ if (error == -EFSBADCRC || error == -EFSCORRUPTED)
+ error = 0;
+ return error;
+}
+
+/* Extract every xattr key that we can from this attr fork block. */
+STATIC int
+xrep_xattr_recover_leaf(
+ struct xrep_xattr *rx,
+ struct xfs_buf *bp)
+{
+ struct xfs_attr3_icleaf_hdr leafhdr;
+ struct xfs_scrub *sc = rx->sc;
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_attr_leafblock *leaf;
+ unsigned long *usedmap = xchk_xattr_usedmap(sc);
+ struct xfs_attr_leaf_name_local *lentry;
+ struct xfs_attr_leaf_name_remote *rentry;
+ struct xfs_attr_leaf_entry *ent;
+ struct xfs_attr_leaf_entry *entries;
+ char *buf_end;
+ size_t off;
+ unsigned int nameidx;
+ unsigned int hdrsize;
+ int i;
+ int error = 0;
+
+ bitmap_zero(usedmap, mp->m_attr_geo->blksize);
+
+ /* Check the leaf header */
+ leaf = bp->b_addr;
+ xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf);
+ hdrsize = xfs_attr3_leaf_hdr_size(leaf);
+ xchk_xattr_set_map(sc, usedmap, 0, hdrsize);
+ entries = xfs_attr3_leaf_entryp(leaf);
+
+ buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize;
+ for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) {
+ if (xchk_should_terminate(sc, &error))
+ break;
+
+ /* Skip key if it conflicts with something else? */
+ off = (char *)ent - (char *)leaf;
+ if (!xchk_xattr_set_map(sc, usedmap, off,
+ sizeof(xfs_attr_leaf_entry_t)))
+ continue;
+
+ /* Check the name information. */
+ nameidx = be16_to_cpu(ent->nameidx);
+ if (nameidx < leafhdr.firstused ||
+ nameidx >= mp->m_attr_geo->blksize)
+ continue;
+
+ if (ent->flags & XFS_ATTR_LOCAL) {
+ lentry = xfs_attr3_leaf_name_local(leaf, i);
+ error = xrep_xattr_salvage_local_attr(rx, ent, nameidx,
+ buf_end, lentry);
+ } else {
+ rentry = xfs_attr3_leaf_name_remote(leaf, i);
+ error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx,
+ buf_end, rentry, i, bp);
+ }
+ if (error)
+ break;
+ }
+
+ return error;
+}
+
+/* Try to recover shortform attrs. */
+STATIC int
+xrep_xattr_recover_sf(
+ struct xrep_xattr *rx)
+{
+ struct xfs_attr_shortform *sf;
+ struct xfs_attr_sf_entry *sfe;
+ struct xfs_attr_sf_entry *next;
+ struct xfs_ifork *ifp;
+ unsigned char *end;
+ int i;
+ int error;
+
+ ifp = XFS_IFORK_PTR(rx->sc->ip, XFS_ATTR_FORK);
+ sf = (struct xfs_attr_shortform *)rx->sc->ip->i_afp->if_u1.if_data;
+ end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes;
+
+ for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) {
+ if (xchk_should_terminate(rx->sc, &error))
+ break;
+
+ next = xfs_attr_sf_nextentry(sfe);
+ if ((unsigned char *)next > end)
+ break;
+
+ /* Ok, let's save this key/value. */
+ error = xrep_xattr_salvage_sf_attr(rx, sfe);
+ if (error)
+ return error;
+
+ sfe = next;
+ }
+
+ return 0;
+}
+
+/*
+ * Try to return a buffer of xattr data for a given physical extent.
+ *
+ * Because the buffer cache get function complains if it finds a buffer
+ * matching the block number but not matching the length, we must be careful to
+ * look for incore buffers (up to the maximum length of a remote value) that
+ * could be hiding anywhere in the physical range. If we find an incore
+ * buffer, we can pass that to the caller. Optionally, read a single block and
+ * pass that back.
+ *
+ * Note the subtlety that remote attr value blocks for which there is no incore
+ * buffer will be passed to the callback one block at a time. These buffers
+ * will not have any ops attached and must be staled to prevent aliasing with
+ * multiblock buffers once we drop the ILOCK.
+ */
+STATIC int
+xrep_xattr_find_buf(
+ struct xfs_mount *mp,
+ xfs_fsblock_t fsbno,
+ xfs_filblks_t max_len,
+ bool can_read,
+ struct xfs_buf **bpp)
+{
+ xfs_daddr_t daddr = XFS_FSB_TO_DADDR(mp, fsbno);
+
+ max_len = min_t(xfs_filblks_t, max_len,
+ xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX));
+
+ /*
+ * Look for an incore buffer for every possible rmt or leaf block that
+ * could start at this physical position.
+ */
+ while (max_len > 0) {
+ struct xfs_buf *bp = xfs_buf_incore(mp->m_ddev_targp, daddr,
+ XFS_FSB_TO_BB(mp, max_len),
+ XBF_TRYLOCK | XBF_SCAN_STALE);
+ if (bp) {
+ *bpp = bp;
+ return 0;
+ }
+
+ max_len--;
+ }
+
+ if (!can_read) {
+ *bpp = NULL;
+ return 0;
+ }
+
+ return xfs_buf_read(mp->m_ddev_targp, daddr, XFS_FSB_TO_BB(mp, 1),
+ XBF_TRYLOCK, bpp, NULL);
+}
+
+/*
+ * Deal with a buffer that we found during our walk of the attr fork.
+ *
+ * Attribute leaf and node blocks are simple -- they're a single block, so we
+ * can walk them one at a time and we never have to worry about discontiguous
+ * multiblock buffers like we do for directories.
+ *
+ * Unfortunately, remote attr blocks add a lot of complexity here. Each disk
+ * block is totally self contained, in the sense that the v5 header provides no
+ * indication that there could be more data in the next block. The incore
+ * buffers can span multiple blocks, though they never cross extent records.
+ * However, they don't necessarily start or end on an extent record boundary.
+ * Therefore, we need a special buffer find function to walk the buffer cache
+ * for us.
+ *
+ * The caller must hold the ILOCK on the file being repaired. We use
+ * XBF_TRYLOCK here to skip any locked buffer on the assumption that we don't
+ * own the block and don't want to hang the system on a potentially garbage
+ * buffer.
+ */
+STATIC int
+xrep_xattr_recover_block(
+ struct xrep_xattr *rx,
+ xfs_dablk_t dabno,
+ xfs_fsblock_t fsbno,
+ xfs_filblks_t max_len,
+ xfs_filblks_t *actual_len)
+{
+ struct xfs_da_blkinfo *info;
+ struct xfs_buf *bp;
+ int error;
+
+ error = xrep_xattr_find_buf(rx->sc->mp, fsbno, max_len, true, &bp);
+ if (error)
+ return error;
+ info = bp->b_addr;
+ *actual_len = XFS_BB_TO_FSB(rx->sc->mp, bp->b_length);
+
+ trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno,
+ be16_to_cpu(info->magic));
+
+ /*
+ * If the buffer has the right magic number for an attr leaf block and
+ * passes a structure check (we don't care about checksums), salvage
+ * as much as we can from the block. */
+ if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) &&
+ xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops))
+ error = xrep_xattr_recover_leaf(rx, bp);
+
+ /*
+ * If the buffer didn't already have buffer ops set, it was read in by
+ * the _find_buf function and could very well be /part/ of a multiblock
+ * remote block. Mark it stale so that it doesn't hang around in
+ * memory to cause problems.
+ */
+ if (bp->b_ops == NULL)
+ xfs_buf_stale(bp);
+
+ xfs_buf_relse(bp);
+ return error;
+}
+
+/* Insert one xattr key/value. */
+STATIC int
+xrep_xattr_insert_rec(
+ struct xrep_xattr *rx,
+ const struct xrep_xattr_key *key)
+{
+ struct xfs_da_args args = { NULL };
+ unsigned char *name;
+ unsigned char *value;
+ int error;
+
+ /*
+ * We want to use a separate transaction for each attribute that we're
+ * adding to the temporary file. However, xattr salvaging uses the
+ * scrub transaction to avoid livelocking on attr tree loops, so we
+ * have to commit the existing scrub transaction to get it out of the
+ * way.
+ */
+ error = xfs_trans_commit(rx->sc->tp);
+ if (error)
+ return error;
+ rx->sc->tp = NULL;
+
+ /*
+ * Grab pointers to the scrub buffer so that we can use them to insert
+ * attrs into the temp file. Because the salvage step should have made
+ * the buffer large enough for (a block bitmap + the largest value
+ * found + the largest possible attr name), it should be safe to use
+ * xfs_xattr_usedmap to copy values.
+ */
+ name = xchk_xattr_namebuf(rx->sc);
+ value = (unsigned char *)xchk_xattr_usedmap(rx->sc);
+
+ /*
+ * The attribute name is stored near the end of the in-core buffer,
+ * though we reserve one more byte to ensure null termination.
+ */
+ name[XATTR_NAME_MAX] = 0;
+
+ error = xblob_get(rx->xattr_blobs, key->name_cookie, name,
+ key->namelen);
+ if (error)
+ return error;
+
+ error = xblob_free(rx->xattr_blobs, key->name_cookie);
+ if (error)
+ return error;
+
+ error = xblob_get(rx->xattr_blobs, key->value_cookie, value,
+ key->valuelen);
+ if (error)
+ return error;
+
+ error = xblob_free(rx->xattr_blobs, key->value_cookie);
+ if (error)
+ return error;
+
+ name[key->namelen] = 0;
+
+ trace_xrep_xattr_insert_rec(rx->sc->tempip, key->flags, name,
+ key->namelen, key->valuelen);
+
+ /*
+ * Drop everything so that we can add the attribute to the tempfile.
+ * The attr set code is very intricate and can roll the transaction
+ * multiple times. We have no way to make it relog both the tempfile
+ * and the file we're repairing, so we're willing to do this to avoid
+ * having to know too much about the details. We still hold the
+ * IOLOCK on the file being repaired, so we can prevent userspace from
+ * adding more attrs to the file we're repairing.
+ */
+ xfs_iunlock(rx->sc->ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(rx->sc->tempip, XFS_ILOCK_EXCL);
+ rx->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
+ rx->sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
+
+ args.dp = rx->sc->tempip;
+ args.attr_filter = key->flags;
+ args.name = name;
+ args.namelen = key->namelen;
+ args.value = value;
+ args.valuelen = key->valuelen;
+ error = xfs_attr_set(&args);
+ if (error)
+ return error;
+
+ /* Now recreate the transaction and relock the inodes. */
+ error = xchk_trans_alloc(rx->sc, 0);
+ if (error)
+ return error;
+
+ xfs_lock_two_inodes(rx->sc->ip, XFS_ILOCK_EXCL, rx->sc->tempip,
+ XFS_ILOCK_EXCL);
+ rx->sc->ilock_flags |= XFS_ILOCK_EXCL;
+ rx->sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
+ return 0;
+}
+
+/*
+ * Periodically flush salvaged attributes to the temporary file. This
+ * is done to reduce the memory requirements of the xattr rebuild, since
+ * directories can contain millions of attributes.
+ */
+STATIC int
+xrep_xattr_flush_salvaged(
+ struct xrep_xattr *rx)
+{
+ struct xrep_xattr_key key;
+ uint64_t nr;
+ int error;
+
+ /* Add all the salvaged attrs to the temporary file. */
+ for (nr = 0; nr < xfbma_length(rx->xattr_records);) {
+ error = xfbma_iter_get(rx->xattr_records, &nr, &key);
+ if (error)
+ return error;
+ error = xrep_xattr_insert_rec(rx, &key);
+ if (error)
+ return error;
+ }
+
+ /* Empty out both arrays now that we've added the entries. */
+ xfbma_truncate(rx->xattr_records);
+ xblob_truncate(rx->xattr_blobs);
+ return 0;
+}
+
+/* Extract as many attribute keys and values as we can. */
+STATIC int
+xrep_xattr_recover(
+ struct xrep_xattr *rx)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_scrub *sc = rx->sc;
+ struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
+ xfs_fileoff_t offset;
+ xfs_filblks_t len;
+ xfs_dablk_t dabno;
+ int nmap;
+ int error;
+
+ /*
+ * Iterate each xattr leaf block in the attr fork to scan them for any
+ * attributes that we might salvage.
+ */
+ for (offset = 0;
+ offset < XFS_MAX_FILEOFF;
+ offset = got.br_startoff + got.br_blockcount) {
+ nmap = 1;
+ error = xfs_bmapi_read(sc->ip, offset, XFS_MAX_FILEOFF - offset,
+ &got, &nmap, XFS_BMAPI_ATTRFORK);
+ if (error)
+ return error;
+ if (nmap != 1)
+ return -EFSCORRUPTED;
+ if (!xfs_bmap_is_written_extent(&got))
+ continue;
+
+ for (dabno = round_up(got.br_startoff, geo->fsbcount);
+ dabno < got.br_startoff + got.br_blockcount;
+ dabno += len) {
+ xfs_fileoff_t curr_offset = dabno - got.br_startoff;
+
+ if (xchk_should_terminate(rx->sc, &error))
+ return error;
+
+ error = xrep_xattr_recover_block(rx, dabno,
+ curr_offset + got.br_startblock,
+ got.br_blockcount - curr_offset,
+ &len);
+ if (error)
+ return error;
+
+ /* Flush attrs to constrain memory usage. */
+ if (xfbma_bytes(rx->xattr_records) +
+ xblob_bytes(rx->xattr_blobs) <
+ XREP_XATTR_SALVAGE_BYTES)
+ continue;
+
+ error = xrep_xattr_flush_salvaged(rx);
+ if (error)
+ return error;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Reset the extended attribute fork to a state where we can start re-adding
+ * the salvaged attributes.
+ */
+STATIC int
+xrep_xattr_fork_remove(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ struct xfs_attr_sf_hdr *hdr;
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK);
+
+ /*
+ * If the data fork is in btree format, we can't change di_forkoff
+ * because we could run afoul of the rule that the data fork isn't
+ * supposed to be in btree format if there's enough space in the fork
+ * that it could have used extents format. Instead, reinitialize the
+ * attr fork to have a shortform structure with zero attributes.
+ */
+ if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE) {
+ ifp->if_format = XFS_DINODE_FMT_LOCAL;
+ xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes,
+ XFS_ATTR_FORK);
+ hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data;
+ hdr->count = 0;
+ hdr->totsize = cpu_to_be16(sizeof(*hdr));
+ xfs_trans_log_inode(sc->tp, ip,
+ XFS_ILOG_CORE | XFS_ILOG_ADATA);
+ return 0;
+ }
+
+ /* If we still have attr fork extents, something's wrong. */
+ if (ifp->if_nextents != 0) {
+ struct xfs_iext_cursor icur;
+ struct xfs_bmbt_irec irec;
+ unsigned int i = 0;
+
+ xfs_emerg(sc->mp,
+ "inode 0x%llx attr fork still has %u attr extents, format %d?!",
+ ip->i_ino, ifp->if_nextents, ifp->if_format);
+ for_each_xfs_iext(ifp, &icur, &irec) {
+ xfs_err(sc->mp, "[%u]: startoff %llu startblock %llu blockcount %llu state %u", i++, irec.br_startoff, irec.br_startblock, irec.br_blockcount, irec.br_state);
+ }
+ ASSERT(0);
+ return -EFSCORRUPTED;
+ }
+
+ xfs_attr_fork_remove(ip, sc->tp);
+ return 0;
+}
+
+/*
+ * Free all the attribute fork blocks and delete the fork. The caller must
+ * join the inode to the transaction. This function returns with the inode
+ * joined to a clean scrub transaction.
+ */
+int
+xrep_xattr_reset_fork(
+ struct xfs_scrub *sc,
+ struct xfs_inode *ip)
+{
+ struct xfs_bmbt_irec got;
+ struct xfs_mount *mp = ip->i_mount;
+ struct xfs_da_geometry *geo = sc->mp->m_attr_geo;
+ struct xfs_buf *bp;
+ xfs_fileoff_t offset = 0;
+ xfs_filblks_t len;
+ xfs_dablk_t dabno;
+ int nmap;
+ int error;
+
+ ASSERT(ip == sc->ip || ip == sc->tempip);
+
+ if (ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL)
+ goto zap;
+
+ /* Invalidate each attr block in the attr fork. Do not do reads. */
+ for (offset = 0;
+ offset < XFS_MAX_FILEOFF;
+ offset = got.br_startoff + got.br_blockcount) {
+ /* Walk the attr fork piece by piece... */
+ nmap = 1;
+ error = xfs_bmapi_read(ip, offset, XFS_MAX_FILEOFF - offset,
+ &got, &nmap, XFS_BMAPI_ATTRFORK);
+ if (error)
+ return error;
+ if (nmap != 1)
+ return -EFSCORRUPTED;
+ if (!xfs_bmap_is_real_extent(&got))
+ continue;
+
+ for (dabno = round_up(got.br_startoff, geo->fsbcount);
+ dabno < got.br_startoff + got.br_blockcount;
+ dabno += len) {
+ xfs_fileoff_t curr_offset = dabno - got.br_startoff;
+
+ error = xrep_xattr_find_buf(mp,
+ curr_offset + got.br_startblock,
+ got.br_blockcount - curr_offset,
+ false, &bp);
+ if (error)
+ break;
+ if (!bp) {
+ /* No buffer found? Advance by one block. */
+ len = geo->fsbcount;
+ continue;
+ }
+ len = XFS_BB_TO_FSB(mp, bp->b_length);
+
+ xfs_buf_stale(bp);
+ xfs_buf_relse(bp);
+ }
+ }
+
+ /* Free all the old xattr blocks; don't discard them for speed. */
+ error = xfs_bunmapi_range(&sc->tp, ip,
+ XFS_BMAPI_NODISCARD | XFS_BMAPI_ATTRFORK,
+ 0, XFS_MAX_FILEOFF);
+ if (error)
+ return error;
+
+zap:
+ error = xrep_xattr_fork_remove(sc, ip);
+ if (error)
+ return error;
+
+ return xrep_roll_trans(sc);
+}
+
+/*
+ * Find all the extended attributes for this inode by scraping them out of the
+ * attribute key blocks by hand, and flushing them into the temp file.
+ */
+STATIC int
+xrep_xattr_find_attributes(
+ struct xrep_xattr *rx)
+{
+ struct xfs_inode *ip = rx->sc->ip;
+ int error;
+
+ error = xrep_ino_dqattach(rx->sc);
+ if (error)
+ return error;
+
+ /* Salvage attributes from the old file. */
+ if (rx->sc->ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL) {
+ error = xrep_xattr_recover_sf(rx);
+ } else {
+ error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK);
+ if (error)
+ return error;
+
+ error = xrep_xattr_recover(rx);
+ }
+ if (error)
+ return error;
+
+ return xrep_xattr_flush_salvaged(rx);
+}
+
+/*
+ * Prepare both inodes' attribute forks for extent swapping. Promote the
+ * tempfile from short format to leaf format, and if the file being repaired
+ * has a short format attr fork, turn it into an empty extent list.
+ */
+STATIC int
+xrep_xattr_swap_prep(
+ struct xfs_scrub *sc,
+ bool temp_local,
+ bool ip_local)
+{
+ int error;
+
+ /*
+ * If the tempfile's attributes are in shortform format, convert that
+ * to a single leaf extent so that we can use the atomic extent swap.
+ */
+ if (temp_local) {
+ struct xfs_buf *leaf_bp = NULL;
+ struct xfs_da_args args = {
+ .dp = sc->tempip,
+ .geo = sc->mp->m_attr_geo,
+ .whichfork = XFS_ATTR_FORK,
+ .trans = sc->tp,
+ .total = 1,
+ };
+
+ error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
+ if (error)
+ return error;
+
+ /*
+ * Roll the deferred log items to get us back to a clean
+ * transaction. Hold on to the leaf buffer across this roll
+ * so that the AIL cannot grab our half-baked block.
+ */
+ xfs_trans_bhold(sc->tp, leaf_bp);
+ error = xfs_defer_finish(&sc->tp);
+ xfs_trans_bhold_release(sc->tp, leaf_bp);
+ }
+
+ /*
+ * If the file being repaired had a shortform attribute fork, convert
+ * that to an empty extent list in preparation for the atomic extent
+ * swap.
+ */
+ if (ip_local) {
+ struct xfs_ifork *ifp;
+
+ ifp = XFS_IFORK_PTR(sc->ip, XFS_ATTR_FORK);
+
+ xfs_idestroy_fork(ifp);
+ ifp->if_format = XFS_DINODE_FMT_EXTENTS;
+ ifp->if_nextents = 0;
+ ifp->if_bytes = 0;
+ ifp->if_u1.if_root = NULL;
+ ifp->if_height = 0;
+
+ xfs_trans_log_inode(sc->tp, sc->ip,
+ XFS_ILOG_CORE | XFS_ILOG_ADATA);
+ }
+
+ return 0;
+}
+
+/* State we need to track while rewriting attr block owners. */
+struct xrep_xattr_swap_owner {
+ struct xfs_attr_list_context ctx;
+ struct xbitmap rmt_blocks;
+ struct xfs_scrub *sc;
+};
+
+/*
+ * Change the owner field of a remote attribute value block to match the file
+ * that's being repaired. In-core buffers for these values span a single
+ * extent and are never logged, so we must be careful to mask off the
+ * corresponding range so that the leaf/node pass will skip these parts of the
+ * attr fork mappings.
+ */
+static void
+xrep_xattr_swap_rmt_owner(
+ struct xfs_attr_list_context *context,
+ int flags,
+ unsigned char *name,
+ int namelen,
+ int valuelen)
+{
+ struct xfs_da_args args = {
+ .op_flags = XFS_DA_OP_NOTIME,
+ .attr_filter = flags & XFS_ATTR_NSP_ONDISK_MASK,
+ .geo = context->dp->i_mount->m_attr_geo,
+ .whichfork = XFS_ATTR_FORK,
+ .dp = context->dp,
+ .name = name,
+ .namelen = namelen,
+ .hashval = xfs_da_hashname(name, namelen),
+ .trans = context->tp,
+ .value = NULL,
+ .valuelen = 0,
+ };
+ LIST_HEAD(buffer_list);
+ struct xfs_bmbt_irec map;
+ struct xrep_xattr_swap_owner *xso;
+ struct xfs_mount *mp = context->dp->i_mount;
+ struct xfs_attr3_rmt_hdr *rmt;
+ struct xfs_buf *bp;
+ void *p;
+ xfs_daddr_t dblkno;
+ int dblkcnt;
+ int nmap;
+ int error;
+
+ xso = container_of(context, struct xrep_xattr_swap_owner, ctx);
+
+ if (flags & (XFS_ATTR_LOCAL | XFS_ATTR_INCOMPLETE))
+ return;
+
+ error = xfs_attr_get_ilocked(&args);
+ if (error)
+ goto fail;
+
+ /*
+ * Mark this region of the attr fork so that the leaf/node scan will
+ * skip this part.
+ */
+ error = xbitmap_set(&xso->rmt_blocks, args.rmtblkno, args.rmtblkcnt);
+ if (error)
+ goto fail;
+
+ while (args.rmtblkcnt > 0) {
+ nmap = 1;
+ error = xfs_bmapi_read(args.dp, args.rmtblkno, args.rmtblkcnt,
+ &map, &nmap, XFS_BMAPI_ATTRFORK);
+ if (error || nmap != 1)
+ goto fail;
+
+ if (!xfs_bmap_is_written_extent(&map))
+ goto fail;
+
+ dblkno = XFS_FSB_TO_DADDR(mp, map.br_startblock);
+ dblkcnt = XFS_FSB_TO_BB(mp, map.br_blockcount);
+ error = xfs_buf_read(mp->m_ddev_targp, dblkno, dblkcnt, 0, &bp,
+ &xfs_attr3_rmt_buf_ops);
+ if (error)
+ goto fail;
+
+ /*
+ * Each rmt block within the buffer gets its own header, so
+ * update the owner for each header.
+ */
+ for (p = bp->b_addr;
+ p < bp->b_addr + BBTOB(bp->b_length);
+ p += mp->m_attr_geo->blksize) {
+ rmt = p;
+ rmt->rm_owner = cpu_to_be64(xso->sc->ip->i_ino);
+ }
+
+ xfs_buf_delwri_queue(bp, &buffer_list);
+ xfs_buf_relse(bp);
+
+ /* roll attribute extent map forwards */
+ args.rmtblkno += map.br_blockcount;
+ args.rmtblkcnt -= map.br_blockcount;
+ }
+
+ /* Write the entire remote value to disk. */
+ error = xfs_buf_delwri_submit(&buffer_list);
+ if (error)
+ goto fail;
+
+ return;
+fail:
+ xfs_buf_delwri_cancel(&buffer_list);
+ context->seen_enough = 1;
+}
+
+/*
+ * Change the owner field of every block in the attribute fork to match the
+ * file being repaired. First we fix the remote value blocks (which have
+ * particular incore geometries) and then change the rest one block at a time.
+ */
+STATIC int
+xrep_xattr_swap_leaf_owner(
+ struct xrep_xattr_swap_owner *xso)
+{
+ struct xfs_bmbt_irec map;
+ struct xfs_da_geometry *geo = xso->sc->mp->m_attr_geo;
+ struct xfs_scrub *sc = xso->sc;
+ struct xfs_da3_blkinfo *info;
+ struct xfs_buf *bp;
+ xfs_fileoff_t offset = 0;
+ xfs_fileoff_t end = -1U;
+ xfs_dablk_t dabno;
+ int nmap;
+ int error;
+
+ for (offset = 0;
+ offset < end;
+ offset = map.br_startoff + map.br_blockcount) {
+ nmap = 1;
+ error = xfs_bmapi_read(sc->tempip, offset, end - offset,
+ &map, &nmap, XFS_BMAPI_ATTRFORK);
+ if (error)
+ return error;
+ if (nmap != 1)
+ return -EFSCORRUPTED;
+ if (!xfs_bmap_is_written_extent(&map)) {
+ continue;
+ }
+
+ if (xbitmap_test(&xso->rmt_blocks, map.br_startoff,
+ &map.br_blockcount)) {
+ continue;
+ }
+
+ for (dabno = round_up(map.br_startoff, geo->fsbcount);
+ dabno < map.br_startoff + map.br_blockcount;
+ dabno += geo->fsbcount) {
+ error = xfs_da_read_buf(sc->tp, sc->tempip,
+ dabno, 0, &bp, XFS_ATTR_FORK, NULL);
+ if (error)
+ return error;
+ if (!bp)
+ return -EFSCORRUPTED;
+
+ info = bp->b_addr;
+ info->owner = cpu_to_be64(sc->ip->i_ino);
+
+ /* If nobody set a buffer type or ops, set them now. */
+ if (bp->b_ops == NULL) {
+ switch (info->hdr.magic) {
+ case cpu_to_be16(XFS_ATTR3_LEAF_MAGIC):
+ bp->b_ops = &xfs_attr3_leaf_buf_ops;
+ break;
+ case cpu_to_be16(XFS_DA3_NODE_MAGIC):
+ bp->b_ops = &xfs_da3_node_buf_ops;
+ break;
+ default:
+ xfs_trans_brelse(sc->tp, bp);
+ return -EFSCORRUPTED;
+ }
+ xfs_buf_set_ref(bp, XFS_ATTR_BTREE_REF);
+ }
+
+ xfs_trans_ordered_buf(sc->tp, bp);
+ xfs_trans_brelse(sc->tp, bp);
+ }
+ }
+
+ return 0;
+}
+/*
+ * Walk the temporary file's xattr blocks, setting the owner field of each
+ * block to the new owner. We use ordered and delwri buffers to flush
+ * everything out to disk ahead of comitting the atomic extent swap. Rewriting
+ * the attr blocks like this is apparently safe because attr inactivation isn't
+ * picky about owner field enforcement(!)
+ */
+STATIC int
+xrep_xattr_swap_owner(
+ struct xfs_scrub *sc)
+{
+ struct xrep_xattr_swap_owner xso = {
+ .ctx.dp = sc->tempip,
+ .ctx.resynch = 1,
+ .ctx.put_listent = xrep_xattr_swap_rmt_owner,
+ .ctx.allow_incomplete = false,
+ .ctx.seen_enough = 0,
+ .ctx.tp = sc->tp,
+ .sc = sc,
+ };
+ int error;
+
+ xbitmap_init(&xso.rmt_blocks);
+
+ /* First pass -- change the owners of the remote blocks. */
+ error = xfs_attr_list_ilocked(&xso.ctx);
+ if (error)
+ goto out;
+ if (xso.ctx.seen_enough) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* Second pass -- change each attr leaf/node buffer. */
+ error = xrep_xattr_swap_leaf_owner(&xso);
+out:
+ xbitmap_destroy(&xso.rmt_blocks);
+ return error;
+}
+
+/*
+ * If both files' attribute structure are in short format, we can copy
+ * the short format data from the tempfile to the repaired file if it'll
+ * fit.
+ */
+STATIC void
+xrep_xattr_swap_local(
+ struct xfs_scrub *sc,
+ int newsize,
+ int forkoff)
+{
+ struct xfs_ifork *ifp1, *ifp2;
+
+ ifp1 = XFS_IFORK_PTR(sc->tempip, XFS_ATTR_FORK);
+ ifp2 = XFS_IFORK_PTR(sc->ip, XFS_ATTR_FORK);
+ sc->ip->i_forkoff = forkoff;
+
+ xfs_idata_realloc(sc->ip, ifp1->if_bytes - ifp2->if_bytes,
+ XFS_ATTR_FORK);
+
+ memcpy(ifp2->if_u1.if_data, ifp1->if_u1.if_data, newsize);
+ xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_ADATA);
+}
+
+/* Swap the temporary file's attribute fork with the one being repaired. */
+STATIC int
+xrep_xattr_swap(
+ struct xrep_xattr *rx)
+{
+ struct xfs_swapext_req req;
+ struct xfs_swapext_res res;
+ struct xfs_scrub *sc = rx->sc;
+ bool ip_local, temp_local;
+ int error;
+
+ error = xrep_swapext_prep(rx->sc, XFS_ATTR_FORK, &req, &res);
+ if (error)
+ return error;
+
+ error = xchk_trans_alloc(sc, res.resblks);
+ if (error)
+ return error;
+
+ sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
+ sc->ilock_flags |= XFS_ILOCK_EXCL;
+ xfs_xchg_range_ilock(sc->tp, sc->ip, sc->tempip);
+
+ ip_local = sc->ip->i_afp->if_format == XFS_DINODE_FMT_LOCAL;
+ temp_local = sc->tempip->i_afp->if_format == XFS_DINODE_FMT_LOCAL;
+
+ /*
+ * If the both files have a local format attr fork and the rebuilt
+ * xattr data would fit in the repaired file's attr fork, just copy
+ * the contents from the tempfile and declare ourselves done.
+ */
+ if (ip_local && temp_local) {
+ int forkoff;
+ int newsize;
+
+ newsize = xfs_attr_sf_totsize(sc->tempip);
+ forkoff = xfs_attr_shortform_bytesfit(sc->ip, newsize);
+ if (forkoff > 0) {
+ xrep_xattr_swap_local(sc, newsize, forkoff);
+ return 0;
+ }
+ }
+
+ /* Otherwise, make sure both attr forks are in block-mapping mode. */
+ error = xrep_xattr_swap_prep(sc, temp_local, ip_local);
+ if (error)
+ return error;
+
+ /* Rewrite the owner field of all attr blocks in the temporary file. */
+ error = xrep_xattr_swap_owner(sc);
+ if (error)
+ return error;
+
+ return xfs_swapext(&sc->tp, &req);
+}
+
+/*
+ * Insert into the tempfile all the attributes that we collected.
+ *
+ * Commit the repair transaction and drop the ilock because the attribute
+ * setting code needs to be able to allocate special transactions and take the
+ * ilock on its own. The attributes are added to the temporary file (which can
+ * be disposed of easily on failure). If we finish rebuilding all of the
+ * salvageable attrs, we can then use atomic extent swapping to commit the
+ * new attr index to the file.
+ */
+STATIC int
+xrep_xattr_rebuild_tree(
+ struct xrep_xattr *rx)
+{
+ int error;
+
+ /*
+ * If we didn't find any attributes to salvage, repair the file by
+ * zapping the attr fork. Join the temp file so that we keep it
+ * rolling forward along with the file being repaired.
+ */
+ if (rx->attrs_found == 0) {
+ xfs_trans_ijoin(rx->sc->tp, rx->sc->tempip, 0);
+ xfs_trans_ijoin(rx->sc->tp, rx->sc->ip, 0);
+ return xrep_xattr_reset_fork(rx->sc, rx->sc->ip);
+ }
+
+ /*
+ * Commit the repair transaction and drop the ILOCK so that we can
+ * use individual transactions to re-add each extended attribute.
+ */
+ error = xfs_trans_commit(rx->sc->tp);
+ rx->sc->tp = NULL;
+ if (error)
+ return error;
+
+ /*
+ * Drop the ILOCK so that we can use the atomic extent swapping
+ * functions, which help us to compute the correct block reservations
+ * and lock the inodes.
+ *
+ * We still hold the IOLOCK (aka i_rwsem) which will prevent attr
+ * modifications, but there's nothing to prevent userspace from
+ * reading/listing the attrs while we build a new attr fork. Oh well,
+ * at least the fs can't shut down those threads if they stumble into
+ * corrupt blocks.
+ */
+ xfs_iunlock(rx->sc->ip, XFS_ILOCK_EXCL);
+ xfs_iunlock(rx->sc->tempip, XFS_ILOCK_EXCL);
+ rx->sc->ilock_flags &= ~XFS_ILOCK_EXCL;
+ rx->sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
+
+ /*
+ * Swap the tempfile's attr fork with the file being repaired. This
+ * recreates the transaction and re-takes the ILOCK in the scrub
+ * context.
+ */
+ error = xrep_xattr_swap(rx);
+ if (error)
+ return error;
+
+ /*
+ * Now wipe out the attr fork of the temp file so that regular inode
+ * inactivation won't trip over the corrupt attr fork.
+ */
+ return xrep_xattr_reset_fork(rx->sc, rx->sc->tempip);
+}
+
+/*
+ * Repair the extended attribute metadata.
+ *
+ * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer.
+ * The buffer cache in XFS can't handle aliased multiblock buffers, so this
+ * might misbehave if the attr fork is crosslinked with other filesystem
+ * metadata.
+ */
+int
+xrep_xattr(
+ struct xfs_scrub *sc)
+{
+ struct xrep_xattr rx = {
+ .sc = sc,
+ };
+ int max_len;
+ int error;
+
+ if (!xfs_inode_hasattr(sc->ip))
+ return -ENOENT;
+
+ /*
+ * Make sure we have enough space to handle salvaging and spilling
+ * every possible local attr value, since we only realloc the buffer
+ * for remote values.
+ */
+ max_len = xfs_attr_leaf_entsize_local_max(sc->mp->m_attr_geo->blksize);
+ error = xrep_setup_xattr_buf(sc, max_len, false);
+ if (error == -ENOMEM)
+ return -EDEADLOCK;
+ if (error)
+ return error;
+
+ /* Set up some storage */
+ rx.xattr_records = xfbma_init("xattr keys",
+ sizeof(struct xrep_xattr_key));
+ if (IS_ERR(rx.xattr_records))
+ return PTR_ERR(rx.xattr_records);
+ rx.xattr_blobs = xblob_init("xattr values");
+ if (IS_ERR(rx.xattr_blobs)) {
+ error = PTR_ERR(rx.xattr_blobs);
+ goto out_arr;
+ }
+
+ /*
+ * Cycle the ILOCK here so that we can lock both the file we're
+ * repairing as well as the tempfile we created earlier.
+ */
+ if (sc->ilock_flags & XFS_ILOCK_EXCL)
+ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL);
+ xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip,
+ XFS_ILOCK_EXCL);
+ sc->ilock_flags |= XFS_ILOCK_EXCL;
+ sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
+
+ /* Collect extended attributes by parsing raw blocks. */
+ error = xrep_xattr_find_attributes(&rx);
+ if (error)
+ goto out;
+
+ /*
+ * Now that we've stuffed all the salvaged attributes in the temporary
+ * file, drop the in-memory staging areas. Hang on to both ILOCKs.
+ */
+ xblob_destroy(rx.xattr_blobs);
+ xfbma_destroy(rx.xattr_records);
+ rx.xattr_blobs = NULL;
+ rx.xattr_records = NULL;
+
+ /* Now rebuild the attribute information. */
+ return xrep_xattr_rebuild_tree(&rx);
+out:
+ xblob_destroy(rx.xattr_blobs);
+out_arr:
+ xfbma_destroy(rx.xattr_records);
+ return error;
+}
diff --git a/fs/xfs/scrub/bitmap.c b/fs/xfs/scrub/bitmap.c
index af874c9febd5..142041519326 100644
--- a/fs/xfs/scrub/bitmap.c
+++ b/fs/xfs/scrub/bitmap.c
@@ -345,3 +345,25 @@ xbitmap_count_set_regions(
return nr;
}
+
+/* Is the start of the range set or clear? And for how long? */
+bool
+xbitmap_test(
+ struct xbitmap *bitmap,
+ uint64_t start,
+ uint64_t *len)
+{
+ struct xbitmap_node *bn;
+ uint64_t last = start + *len - 1;
+
+ bn = xbitmap_tree_iter_first(&bitmap->xb_root, start, last);
+ if (!bn)
+ return false;
+ if (bn->bn_start <= start) {
+ if (bn->bn_last < last)
+ *len = bn->bn_last - start + 1;
+ return true;
+ }
+ *len = bn->bn_start - start;
+ return false;
+}
diff --git a/fs/xfs/scrub/bitmap.h b/fs/xfs/scrub/bitmap.h
index 7a569a9949f7..2793019cfa47 100644
--- a/fs/xfs/scrub/bitmap.h
+++ b/fs/xfs/scrub/bitmap.h
@@ -35,5 +35,6 @@ int xbitmap_walk(struct xbitmap *bitmap, xbitmap_walk_fn fn,
bool xbitmap_empty(struct xbitmap *bitmap);
uint64_t xbitmap_count_set_regions(struct xbitmap *bitmap);
+bool xbitmap_test(struct xbitmap *bitmap, uint64_t start, uint64_t *len);
#endif /* __XFS_SCRUB_BITMAP_H__ */
diff --git a/fs/xfs/scrub/blob.c b/fs/xfs/scrub/blob.c
index 3899124b2884..b8749a47136a 100644
--- a/fs/xfs/scrub/blob.c
+++ b/fs/xfs/scrub/blob.c
@@ -141,3 +141,27 @@ xblob_free(
xfile_discard(blob->xfile, cookie, cookie + sizeof(key) + key.size - 1);
return 0;
}
+
+/* How many bytes is this blob storage object consuming? */
+loff_t
+xblob_bytes(
+ struct xblob *blob)
+{
+ struct kstat statbuf;
+ int ret;
+
+ ret = xfile_statx(blob->xfile, &statbuf);
+ if (ret)
+ return ret;
+
+ return statbuf.blocks * 512;
+}
+
+/* Drop all the blobs. */
+void
+xblob_truncate(
+ struct xblob *blob)
+{
+ xfile_discard(blob->xfile, 0, MAX_LFS_FILESIZE);
+ blob->last_offset = 0;
+}
diff --git a/fs/xfs/scrub/blob.h b/fs/xfs/scrub/blob.h
index 106284d7ccf5..7b19f7ed7898 100644
--- a/fs/xfs/scrub/blob.h
+++ b/fs/xfs/scrub/blob.h
@@ -20,5 +20,7 @@ int xblob_get(struct xblob *blob, xblob_cookie cookie, void *ptr,
int xblob_put(struct xblob *blob, xblob_cookie *cookie, void *ptr,
uint32_t size);
int xblob_free(struct xblob *blob, xblob_cookie cookie);
+loff_t xblob_bytes(struct xblob *blob);
+void xblob_truncate(struct xblob *blob);
#endif /* __XFS_SCRUB_BLOB_H__ */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index e111bdfb391b..b5d0f23b9b6d 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -28,6 +28,7 @@
#include "xfs_ag_resv.h"
#include "xfs_quota.h"
#include "xfs_bmap.h"
+#include "xfs_dir2.h"
#include "xfs_da_format.h"
#include "xfs_da_btree.h"
#include "xfs_attr.h"
@@ -38,7 +39,7 @@
#include "xfs_health.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
-#include "xfs_dir2.h"
+#include "xfs_swapext.h"
#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
@@ -1668,6 +1669,17 @@ xrep_metadata_inode_forks(
return error;
}
+ /* Clear the attr forks since metadata shouldn't have that. */
+ if (xfs_inode_hasattr(sc->ip)) {
+ if (!dirty) {
+ dirty = true;
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+ }
+ error = xrep_xattr_reset_fork(sc, sc->ip);
+ if (error)
+ return error;
+ }
+
/*
* If we modified the inode, roll the transaction but don't rejoin the
* inode to the new transaction because xrep_bmap_data can do that.
@@ -2029,3 +2041,126 @@ out:
xfs_buf_delwri_cancel(&buffers_list);
return error;
}
+
+/*
+ * See if this buffer can pass the given ->verify_struct() function.
+ *
+ * If the buffer already has ops attached and they're not the ones that were
+ * passed in, we reject the buffer. Otherwise, we perform the structure test
+ * (note that we do not check CRCs) and return the outcome of the test. The
+ * buffer ops and error state are left unchanged.
+ */
+bool
+xrep_buf_verify_struct(
+ struct xfs_buf *bp,
+ const struct xfs_buf_ops *ops)
+{
+ const struct xfs_buf_ops *old_ops = bp->b_ops;
+ xfs_failaddr_t fa;
+ int old_error;
+
+ if (old_ops) {
+ if (old_ops != ops)
+ return false;
+ }
+
+ old_error = bp->b_error;
+ bp->b_ops = ops;
+ fa = bp->b_ops->verify_struct(bp);
+ bp->b_ops = old_ops;
+ bp->b_error = old_error;
+
+ return fa == NULL;
+}
+
+/*
+ * Fill out the swapext request and resource estimation structures in
+ * preparation for swapping the contents of a metadata file that we've rebuilt
+ * in the temp file.
+ */
+int
+xrep_swapext_prep(
+ struct xfs_scrub *sc,
+ int whichfork,
+ struct xfs_swapext_req *req,
+ struct xfs_swapext_res *res)
+{
+ struct xfs_ifork *ifp, *tifp;
+ int state = 0;
+
+ ASSERT(whichfork != XFS_COW_FORK);
+
+ /* Both files should have the relevant forks. */
+ ifp = XFS_IFORK_PTR(sc->ip, whichfork);
+ tifp = XFS_IFORK_PTR(sc->tempip, whichfork);
+ if (!ifp || !tifp) {
+ ASSERT(0);
+ return -EINVAL;
+ }
+
+ memset(res, 0, sizeof(struct xfs_swapext_res));
+ req->ip1 = sc->tempip;
+ req->ip2 = sc->ip;
+ req->startoff1 = 0;
+ req->startoff2 = 0;
+ req->whichfork = whichfork;
+ req->blockcount = XFS_MAX_FILEOFF;
+ req->req_flags = 0;
+
+ /*
+ * If we're repairing xattrs or directories, always try to convert ip2
+ * to short format after swapping.
+ */
+ if (whichfork == XFS_ATTR_FORK || S_ISDIR(VFS_I(sc->ip)->i_mode))
+ req->req_flags |= XFS_SWAP_REQ_FILE2_CVT_SF;
+
+ /*
+ * Deal with either fork being in local format. The swapext code only
+ * knows how to exchange block mappings for regular files, so we only
+ * have to know about local format for xattrs and directories.
+ */
+ if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
+ state |= 1;
+ if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
+ state |= 2;
+ switch (state) {
+ case 0:
+ /* Both files have mapped extents; use the regular estimate. */
+ return xfs_xchg_range_estimate(req, res);
+ case 1:
+ /*
+ * The file being repaired is in local format, but the temp
+ * file has mapped extents. To perform the swap, the file
+ * being repaired will be reinitialized to have an empty extent
+ * map, so the number of exchanges is the temporary file's
+ * extent count.
+ */
+ res->ip1_bcount = sc->tempip->i_nblocks;
+ res->nr_exchanges = tifp->if_nextents;
+ break;
+ case 2:
+ /*
+ * The temporary file is in local format, but the file being
+ * repaired has mapped extents. To perform the swap, the temp
+ * file will be converted to have a single block, so the number
+ * of exchanges is (worst case) the extent count of the file
+ * being repaired plus one more.
+ */
+ res->ip1_bcount = 1;
+ res->ip2_bcount = sc->ip->i_nblocks;
+ res->nr_exchanges = ifp->if_nextents;
+ break;
+ case 3:
+ /*
+ * Both forks are in local format. To perform the swap, the
+ * file being repaired will be reinitialized to have an empty
+ * extent map and the temp file will be converted to have a
+ * single block. Only one exchange is required.
+ */
+ res->ip1_bcount = 1;
+ res->nr_exchanges = 1;
+ break;
+ }
+
+ return xfs_swapext_estimate_overhead(req, res);
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 15baf294ce24..26278ee87736 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -24,6 +24,8 @@ static inline int xrep_notsupported(struct xfs_scrub *sc)
enum xfs_blft;
struct xbitmap;
+struct xfs_swapext_req;
+struct xfs_swapext_res;
int xrep_attempt(struct xfs_scrub *sc);
void xrep_failure(struct xfs_mount *mp);
@@ -46,6 +48,8 @@ typedef int (*xrep_setfile_getbuf_fn)(struct xfs_scrub *sc,
int xrep_set_file_contents(struct xfs_scrub *sc,
const struct xfs_buf_ops *ops, enum xfs_blft type,
xfs_fileoff_t isize);
+int xrep_swapext_prep(struct xfs_scrub *sc, int whichfork,
+ struct xfs_swapext_req *req, struct xfs_swapext_res *res);
int xrep_fix_freelist(struct xfs_scrub *sc, int alloc_flags);
int xrep_reap_extents(struct xfs_scrub *sc, struct xbitmap *exlist,
@@ -71,6 +75,7 @@ int xrep_reset_perag_resv(struct xfs_scrub *sc);
int xrep_bmap(struct xfs_scrub *sc, int whichfork, bool allow_unwritten);
int xrep_metadata_inode_forks(struct xfs_scrub *sc);
int xrep_rmapbt_setup(struct xfs_scrub *sc);
+int xrep_xattr_reset_fork(struct xfs_scrub *sc, struct xfs_inode *ip);
void xrep_ag_btcur_init(struct xfs_scrub *sc, struct xchk_ag *sa);
int xrep_ag_init(struct xfs_scrub *sc, struct xfs_perag *pag,
@@ -97,6 +102,7 @@ int xrep_bmap_data(struct xfs_scrub *sc);
int xrep_bmap_attr(struct xfs_scrub *sc);
int xrep_symlink(struct xfs_scrub *sc);
int xrep_fscounters(struct xfs_scrub *sc);
+int xrep_xattr(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_QUOTA
int xrep_quota(struct xfs_scrub *sc);
@@ -169,6 +175,8 @@ void xrep_bload_estimate_slack(struct xfs_scrub *sc,
struct xfs_btree_bload *bload);
int xrep_newbt_relog_efis(struct xrep_newbt *xnr);
+bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
+
#else
static inline int
@@ -228,6 +236,7 @@ xrep_rmapbt_setup(
#define xrep_quotacheck xrep_notsupported
#define xrep_fscounters xrep_notsupported
#define xrep_rtsummary xrep_notsupported
+#define xrep_xattr xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 662f1959227d..dfc95b4aef81 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -317,7 +317,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.type = ST_INODE,
.setup = xchk_setup_xattr,
.scrub = xchk_xattr,
- .repair = xrep_notsupported,
+ .repair = xrep_xattr,
},
[XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */
.type = ST_INODE,
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 95d9b2074e6c..3994fcdf5916 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -209,4 +209,7 @@ struct xchk_fscounters {
unsigned long long icount_max;
};
+bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map,
+ unsigned int start, unsigned int len);
+
#endif /* __XFS_SCRUB_SCRUB_H__ */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index ba80f0d380a9..8d0ec94c7552 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -14,6 +14,7 @@
#include "xfs_btree.h"
#include "xfs_ag.h"
#include "xfs_quota_defs.h"
+#include "xfs_da_format.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index e40d3686940b..a165cc31c516 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1443,6 +1443,86 @@ TRACE_EVENT(xrep_setup_tempfile,
__entry->temp_inum)
);
+TRACE_EVENT(xrep_xattr_recover_leafblock,
+ TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic),
+ TP_ARGS(ip, dabno, magic),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(xfs_dablk_t, dabno)
+ __field(uint16_t, magic)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->dabno = dabno;
+ __entry->magic = magic;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx dablk 0x%x magic 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __entry->dabno,
+ __entry->magic)
+);
+
+TRACE_EVENT(xrep_xattr_salvage_key,
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
+ unsigned int namelen, unsigned int valuelen),
+ TP_ARGS(ip, flags, name, namelen, valuelen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, flags)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen)
+ __field(unsigned int, valuelen)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->flags = flags;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __entry->valuelen = valuelen;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->valuelen)
+);
+
+TRACE_EVENT(xrep_xattr_insert_rec,
+ TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name,
+ unsigned int namelen, unsigned int valuelen),
+ TP_ARGS(ip, flags, name, namelen, valuelen),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, flags)
+ __field(unsigned int, namelen)
+ __dynamic_array(char, name, namelen)
+ __field(unsigned int, valuelen)
+ ),
+ TP_fast_assign(
+ __entry->dev = ip->i_mount->m_super->s_dev;
+ __entry->ino = ip->i_ino;
+ __entry->flags = flags;
+ __entry->namelen = namelen;
+ memcpy(__get_str(name), name, namelen);
+ __entry->valuelen = valuelen;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen 0x%x",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_flags(__entry->flags, "|", XFS_ATTR_NAMESPACE_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->valuelen)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 5fa6cd947dd4..3686b73c9e18 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -470,8 +470,12 @@ _xfs_buf_obj_cmp(
* it stale has not yet committed. i.e. we are
* reallocating a busy extent. Skip this buffer and
* continue searching for an exact match.
+ *
+ * Note: If we're scanning for incore buffers to stale, don't
+ * complain if we find non-stale buffers.
*/
- ASSERT(bp->b_flags & XBF_STALE);
+ if (!(map->bm_flags & XBM_SCAN_STALE))
+ ASSERT(bp->b_flags & XBF_STALE);
return 1;
}
return 0;
@@ -537,6 +541,9 @@ xfs_buf_find(
*found_bp = NULL;
+ if (flags & XBF_SCAN_STALE)
+ cmap.bm_flags |= XBM_SCAN_STALE;
+
for (i = 0; i < nmaps; i++)
cmap.bm_len += map[i].bm_len;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 6b0200b8007d..863847a56b29 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -36,6 +36,13 @@ struct xfs_buf;
#define _XBF_DQUOTS (1 << 17)/* dquot buffer */
#define _XBF_LOGRECOVERY (1 << 18)/* log recovery buffer */
+/*
+ * The caller is scanning for incore buffers to mark stale after a repair.
+ * Don't complain if we find a non-stale buffer of the wrong length, that's
+ * exactly the point.
+ */
+#define XBF_SCAN_STALE (1 << 19)
+
/* flags used only internally */
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
@@ -108,6 +115,8 @@ typedef struct xfs_buftarg {
struct xfs_buf_map {
xfs_daddr_t bm_bn; /* block number for I/O */
int bm_len; /* size of I/O */
+ unsigned int bm_flags;
+#define XBM_SCAN_STALE (1 << 0) /* see XBF_SCAN_STALE */
};
#define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 6f242f4991b2..ba3a63e5ce86 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -31,6 +31,8 @@
* pos: file offset, in bytes
* bytecount: number of bytes
*
+ * dablk: directory or xattr block offset, in filesystem blocks
+ *
* disize: ondisk file size, in bytes
* isize: incore file size, in bytes
*