diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-02-19 17:01:48 -0800 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-01 21:16:31 -0700 |
commit | ee4d955c32eb0ea93a6e4c6df066b66771f4e1fd (patch) | |
tree | 277d8069d011f1cf0c3ff3ef3b6bc52021bcbe9f | |
parent | a4154094a525c1b6cdadc105950c55c0fa70c8fc (diff) |
xfs: repair extended attributes
If the extended attributes look bad, try to sift through the rubble to
find whatever keys/values we can, zap the attr tree, and re-add the
values.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/attr.c | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/attr.h | 10 | ||||
-rw-r--r-- | fs/xfs/scrub/attr_repair.c | 832 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.c | 31 | ||||
-rw-r--r-- | fs/xfs/scrub/repair.h | 4 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 3 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.c | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 85 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 8 |
12 files changed, 992 insertions, 4 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index d618718771ec..3fd56fb0ab48 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -170,6 +170,7 @@ xfs-y += $(addprefix scrub/, \ agheader_repair.o \ alloc_repair.o \ array.o \ + attr_repair.o \ bitmap.o \ blob.o \ bmap_repair.o \ diff --git a/fs/xfs/scrub/attr.c b/fs/xfs/scrub/attr.c index 9faddb334a2c..beda402c35e8 100644 --- a/fs/xfs/scrub/attr.c +++ b/fs/xfs/scrub/attr.c @@ -38,9 +38,15 @@ xchk_setup_xattr_buf( * We need enough space to read an xattr value from the file or enough * space to hold three copies of the xattr free space bitmap. We don't * need the buffer space for both purposes at the same time. + * + * If we're doing a repair, we need enough space to hold the largest + * xattr value and the largest xattr name. */ sz = 3 * sizeof(long) * BITS_TO_LONGS(sc->mp->m_attr_geo->blksize); - sz = max_t(size_t, sz, value_size); + if (sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR) + sz = max_t(size_t, sz, value_size + XATTR_NAME_MAX + 1); + else + sz = max_t(size_t, sz, value_size); /* * If there's already a buffer, figure out if we need to reallocate it @@ -182,7 +188,7 @@ fail_xref: * Within a char, the lowest bit of the char represents the byte with * the smallest address */ -STATIC bool +bool xchk_xattr_set_map( struct xfs_scrub *sc, unsigned long *map, diff --git a/fs/xfs/scrub/attr.h b/fs/xfs/scrub/attr.h index 13a1d2e8424d..b2d758953300 100644 --- a/fs/xfs/scrub/attr.h +++ b/fs/xfs/scrub/attr.h @@ -37,6 +37,16 @@ xchk_xattr_valuebuf( return ab->buf; } +/* A place to store attribute names. */ +static inline unsigned char * +xchk_xattr_namebuf( + struct xfs_scrub *sc) +{ + struct xchk_xattr_buf *ab = sc->buf; + + return (unsigned char *)ab->buf + ab->sz - XATTR_NAME_MAX - 1; +} + /* A bitmap of space usage computed by walking an attr leaf block. */ static inline unsigned long * xchk_xattr_usedmap( diff --git a/fs/xfs/scrub/attr_repair.c b/fs/xfs/scrub/attr_repair.c new file mode 100644 index 000000000000..fb95f0191fed --- /dev/null +++ b/fs/xfs/scrub/attr_repair.c @@ -0,0 +1,832 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2020 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_btree.h" +#include "xfs_bit.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_sb.h" +#include "xfs_inode.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_dir2.h" +#include "xfs_attr.h" +#include "xfs_attr_leaf.h" +#include "xfs_attr_sf.h" +#include "xfs_attr_remote.h" +#include "xfs_bmap.h" +#include "scrub/xfs_scrub.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/repair.h" +#include "scrub/array.h" +#include "scrub/blob.h" +#include "scrub/attr.h" + +/* + * Extended Attribute Repair + * ========================= + * + * We repair extended attributes by reading the attribute fork blocks looking + * for keys and values, then truncate the entire attr fork and reinsert all + * the attributes. Unfortunately, there's no secondary copy of most extended + * attribute data, which means that if we blow up midway through there's + * little we can do. + */ + +struct xrep_xattr_key { + xblob_cookie value_cookie; + xblob_cookie name_cookie; + uint hash; + int flags; + uint32_t valuelen; + uint16_t namelen; +} __packed; + +struct xrep_xattr { + struct xfs_scrub *sc; + struct xfbma *xattr_records; + struct xblob *xattr_blobs; + + /* Size of the largest attribute value we're trying to salvage. */ + size_t max_valuelen; +}; + +/* + * Decide if we want to salvage this attribute. We don't bother with + * incomplete or oversized keys or values. + */ +STATIC int +xrep_xattr_want_salvage( + int flags, + const void *name, + int namelen, + int valuelen) +{ + if (flags & XFS_ATTR_INCOMPLETE) + return false; + if (namelen > XATTR_NAME_MAX || namelen <= 0) + return false; + if (valuelen > XATTR_SIZE_MAX || valuelen < 0) + return false; + if (!xfs_attr_namecheck(name, namelen)) + return false; + return true; +} + +/* Allocate an in-core record to hold xattrs while we rebuild the xattr data. */ +STATIC int +xrep_xattr_salvage_key( + struct xrep_xattr *rx, + int flags, + unsigned char *name, + int namelen, + unsigned char *value, + int valuelen) +{ + struct xrep_xattr_key key = { + .valuelen = valuelen, + .flags = flags & (XFS_ATTR_ROOT | XFS_ATTR_SECURE), + .namelen = namelen, + }; + int error = 0; + + if (xchk_should_terminate(rx->sc, &error)) + return error; + + trace_xrep_xattr_salvage_key(rx->sc->ip, key.flags, name, namelen, + valuelen); + + error = xblob_put(rx->xattr_blobs, &key.name_cookie, name, namelen); + if (error) + return error; + error = xblob_put(rx->xattr_blobs, &key.value_cookie, value, valuelen); + if (error) + return error; + + key.hash = xfs_da_hashname(name, namelen); + + error = xfbma_append(rx->xattr_records, &key); + if (error) + return error; + + rx->max_valuelen = max_t(size_t, rx->max_valuelen, valuelen); + return 0; +} + +/* + * Record a shortform extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_sf_attr( + struct xrep_xattr *rx, + struct xfs_attr_sf_entry *sfe) +{ + unsigned char *value = &sfe->nameval[sfe->namelen]; + + if (!xrep_xattr_want_salvage(sfe->flags, sfe->nameval, sfe->namelen, + sfe->valuelen)) + return 0; + + return xrep_xattr_salvage_key(rx, sfe->flags, sfe->nameval, + sfe->namelen, value, sfe->valuelen); +} + +/* + * Record a local format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_local_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_local *lentry) +{ + unsigned char *value; + unsigned long *usedmap = xchk_xattr_usedmap(rx->sc); + unsigned int valuelen; + unsigned int namesize; + + /* + * Decode the leaf local entry format. If something seems wrong, we + * junk the attribute. + */ + valuelen = be16_to_cpu(lentry->valuelen); + namesize = xfs_attr_leaf_entsize_local(lentry->namelen, valuelen); + if ((char *)lentry + namesize > buf_end) + return 0; + if (!xrep_xattr_want_salvage(ent->flags, lentry->nameval, + lentry->namelen, valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize)) + return 0; + + /* Try to save this attribute. */ + value = &lentry->nameval[lentry->namelen]; + return xrep_xattr_salvage_key(rx, ent->flags, lentry->nameval, + lentry->namelen, value, valuelen); +} + +/* + * Record a remote format extended attribute key & value for later reinsertion + * into the inode. + */ +STATIC int +xrep_xattr_salvage_remote_attr( + struct xrep_xattr *rx, + struct xfs_attr_leaf_entry *ent, + unsigned int nameidx, + const char *buf_end, + struct xfs_attr_leaf_name_remote *rentry, + unsigned int ent_idx, + struct xfs_buf *leaf_bp) +{ + struct xfs_da_args args = { + .trans = rx->sc->tp, + .dp = rx->sc->ip, + .index = ent_idx, + .geo = rx->sc->mp->m_attr_geo, + }; + unsigned long *usedmap = xchk_xattr_usedmap(rx->sc); + unsigned char *value; + unsigned int valuelen; + unsigned int namesize; + int error; + + /* + * Decode the leaf remote entry format. If something seems wrong, we + * junk the attribute. Note that we should never find a zero-length + * remote attribute value. + */ + valuelen = be32_to_cpu(rentry->valuelen); + namesize = xfs_attr_leaf_entsize_remote(rentry->namelen); + if ((char *)rentry + namesize > buf_end) + return 0; + if (valuelen == 0 || + !xrep_xattr_want_salvage(ent->flags, rentry->name, rentry->namelen, + valuelen)) + return 0; + if (!xchk_xattr_set_map(rx->sc, usedmap, nameidx, namesize)) + return 0; + + /* + * Find somewhere to save this value. We can't use the xchk_xattr_buf + * here because we're still using the memory for the attr block bitmap. + */ + value = kmem_alloc_large(valuelen, KM_MAYFAIL); + if (!value) + return -ENOMEM; + + /* Look up the remote value and stash it for reconstruction. */ + args.valuelen = valuelen; + args.namelen = rentry->namelen; + args.name = rentry->name; + args.value = value; + error = xfs_attr3_leaf_getvalue(leaf_bp, &args); + if (error || args.rmtblkno == 0) + goto err_free; + + error = xfs_attr_rmtval_get(&args); + if (error) + goto err_free; + + /* Try to save this attribute. */ + error = xrep_xattr_salvage_key(rx, ent->flags, rentry->name, + rentry->namelen, value, valuelen); +err_free: + /* remote value was garbage, junk it */ + if (error == -EFSBADCRC || error == -EFSCORRUPTED) + error = 0; + kmem_free(value); + return error; +} + +/* Extract every xattr key that we can from this attr fork block. */ +STATIC int +xrep_xattr_recover_leaf( + struct xrep_xattr *rx, + struct xfs_buf *bp) +{ + struct xfs_attr3_icleaf_hdr leafhdr; + struct xfs_scrub *sc = rx->sc; + struct xfs_mount *mp = sc->mp; + struct xfs_attr_leafblock *leaf; + unsigned long *usedmap = xchk_xattr_usedmap(sc); + struct xfs_attr_leaf_name_local *lentry; + struct xfs_attr_leaf_name_remote *rentry; + struct xfs_attr_leaf_entry *ent; + struct xfs_attr_leaf_entry *entries; + char *buf_end; + size_t off; + unsigned int nameidx; + unsigned int hdrsize; + int i; + int error = 0; + + bitmap_zero(usedmap, mp->m_attr_geo->blksize); + + /* Check the leaf header */ + leaf = bp->b_addr; + xfs_attr3_leaf_hdr_from_disk(mp->m_attr_geo, &leafhdr, leaf); + hdrsize = xfs_attr3_leaf_hdr_size(leaf); + xchk_xattr_set_map(sc, usedmap, 0, hdrsize); + entries = xfs_attr3_leaf_entryp(leaf); + + buf_end = (char *)bp->b_addr + mp->m_attr_geo->blksize; + for (i = 0, ent = entries; i < leafhdr.count; ent++, i++) { + if (xchk_should_terminate(sc, &error)) + break; + + /* Skip key if it conflicts with something else? */ + off = (char *)ent - (char *)leaf; + if (!xchk_xattr_set_map(sc, usedmap, off, + sizeof(xfs_attr_leaf_entry_t))) + continue; + + /* Check the name information. */ + nameidx = be16_to_cpu(ent->nameidx); + if (nameidx < leafhdr.firstused || + nameidx >= mp->m_attr_geo->blksize) + continue; + + if (ent->flags & XFS_ATTR_LOCAL) { + lentry = xfs_attr3_leaf_name_local(leaf, i); + error = xrep_xattr_salvage_local_attr(rx, ent, nameidx, + buf_end, lentry); + } else { + rentry = xfs_attr3_leaf_name_remote(leaf, i); + error = xrep_xattr_salvage_remote_attr(rx, ent, nameidx, + buf_end, rentry, i, bp); + } + if (error) + break; + } + + return error; +} + +/* Try to recover shortform attrs. */ +STATIC int +xrep_xattr_recover_sf( + struct xrep_xattr *rx) +{ + struct xfs_attr_shortform *sf; + struct xfs_attr_sf_entry *sfe; + struct xfs_attr_sf_entry *next; + struct xfs_ifork *ifp; + unsigned char *end; + int i; + int error; + + ifp = XFS_IFORK_PTR(rx->sc->ip, XFS_ATTR_FORK); + sf = (struct xfs_attr_shortform *)rx->sc->ip->i_afp->if_u1.if_data; + end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes; + + for (i = 0, sfe = &sf->list[0]; i < sf->hdr.count; i++) { + if (xchk_should_terminate(rx->sc, &error)) + break; + + next = XFS_ATTR_SF_NEXTENTRY(sfe); + if ((unsigned char *)next > end) + break; + + /* Ok, let's save this key/value. */ + error = xrep_xattr_salvage_sf_attr(rx, sfe); + if (error) + return error; + + sfe = next; + } + + return 0; +} + +/* + * Blindly walk every block in an attr fork without stumbling over incore + * buffers for remote attr value blocks. + * + * Attribute leaf and node blocks are simple -- they're a single block, so we + * can walk them one at a time and we never have to worry about discontiguous + * multiblock buffers like we do for directories. + * + * Unfortunately, remote attr blocks add a lot of complexity here. Each disk + * block is totally self contained, in the sense that the v5 header provides no + * indication that there could be more data in the next block. The incore + * buffers can span multiple blocks, though they never cross extent records. + * However, they don't necessarily start or end on an extent record boundary. + * + * Because the buffer cache get function complains if it finds a buffer + * matching the block number but not matching the length, we must be careful to + * look for incore buffers (up to the maximum length of a remote value) that + * could be hiding anywhere in the extent record. If we find an incore buffer, + * we can pass that to the callback function. Otherwise, read a single block + * and pass that to the callback. Note the subtlety that remote attr value + * blocks for which there is no incore buffer will be passed to the callback + * one block at a time. + * + * The caller must hold the ILOCK. We use XBF_TRYLOCK here to skip any locked + * buffer on the assumption that we don't own the block and don't want to hang + * the system on a potentially garbage buffer. + * + * XREP_ATTR_WALK_INCORE: don't read buffers from disk. + */ +#define XREP_ATTR_WALK_INCORE (1U << 0) +STATIC int +xrep_attr_walk_blind( + struct xfs_inode *ip, + unsigned int flags, + int (*fn)(struct xfs_inode *ip, xfs_dablk_t dabno, + struct xfs_buf *bp, void *priv), + void *priv) +{ + struct xfs_bmbt_irec map; + struct xfs_mount *mp = ip->i_mount; + xfs_fileoff_t offset = 0; + xfs_fileoff_t end = XFS_MAX_FILEOFF; + xfs_filblks_t len; + xfs_fsblock_t fsbno; + xfs_dablk_t dabno; + int max_rmt_blocks; + int nmap; + int error = 0; + + ASSERT(ip->i_mount->m_attr_geo->fsbcount == 1); + + max_rmt_blocks = xfs_attr3_rmt_blocks(mp, XFS_XATTR_SIZE_MAX); + + for (offset = 0; + offset < end; + offset = map.br_startoff + map.br_blockcount) { + /* Walk the attr fork piece by piece... */ + nmap = 1; + error = xfs_bmapi_read(ip, offset, end - offset, + &map, &nmap, XFS_BMAPI_ATTRFORK); + if (error) + return error; + if (nmap != 1) + return -EFSCORRUPTED; + if (!xfs_bmap_is_real_extent(&map)) + continue; + + for (dabno = map.br_startoff, fsbno = map.br_startblock; + dabno < map.br_startoff + map.br_blockcount; + dabno += len, fsbno += len) { + struct xfs_buf *bp; + xfs_daddr_t daddr = XFS_FSB_TO_DADDR(mp, fsbno); + + len = min_t(xfs_filblks_t, map.br_blockcount, + max_rmt_blocks); + + /* + * Look for an incore buffer for every possible rmt + * or leaf block that could start at this physical + * position. + */ + while (len > 0) { + bp = xfs_buf_incore(mp->m_ddev_targp, daddr, + XFS_FSB_TO_BB(mp, len), + XBF_TRYLOCK | XBF_SCAN_STALE); + if (bp) + goto dispatch_fn; + + len--; + } + + if (flags & XREP_ATTR_WALK_INCORE) + continue; + + /* + * If we didn't find a buffer, read 1 block from disk. + * We don't attach any buffer ops. + */ + len = 1; + error = xfs_buf_read(mp->m_ddev_targp, daddr, + XFS_FSB_TO_BB(mp, len), + XBF_TRYLOCK, &bp, NULL); + if (error) + return error; + +dispatch_fn: + /* Call the callback function. */ + error = fn(ip, dabno, bp, priv); + xfs_buf_relse(bp); + if (error) + return error; + } + } + + return 0; +} + +/* Deal with a buffer that we found during our walk of the attr fork. */ +STATIC int +xrep_xattr_recover_block( + struct xfs_inode *ip, + xfs_dablk_t dabno, + struct xfs_buf *bp, + void *priv) +{ + struct xrep_xattr *rx = priv; + struct xfs_da_blkinfo *info = bp->b_addr; + int error = 0; + + trace_xrep_xattr_recover_leafblock(rx->sc->ip, dabno, + be16_to_cpu(info->magic)); + + /* + * If the buffer has the right magic number for an attr leaf block and + * passes a structure check (we don't care about checksums), salvage + * as much as we can from the block. */ + if (info->magic == cpu_to_be16(XFS_ATTR3_LEAF_MAGIC) && + xrep_buf_verify_struct(bp, &xfs_attr3_leaf_buf_ops)) + error = xrep_xattr_recover_leaf(rx, bp); + + /* + * If the buffer didn't already have buffer ops set, mark it stale so + * that it doesn't hang around in memory to cause problems. + */ + if (bp->b_ops == NULL) + xfs_buf_stale(bp); + return error; +} + +/* Extract as many attribute keys and values as we can. */ +STATIC int +xrep_xattr_recover( + struct xrep_xattr *rx) +{ + struct xfs_scrub *sc = rx->sc; + int error = 0; + + if (sc->ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + return xrep_xattr_recover_sf(rx); + + /* + * Set the xchk_attr_buf to be as large as we're going to need it to be + * to compute space usage bitmaps for each attr block we try to + * salvage. We don't salvage attrs whose name and value areas are + * crosslinked with anything else. + */ + error = xchk_setup_xattr_buf(sc, 0, KM_MAYFAIL); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + + return xrep_attr_walk_blind(sc->ip, 0, xrep_xattr_recover_block, rx); +} + +/* + * Reset the extended attribute fork to a state where we can start re-adding + * the salvaged attributes. + */ +STATIC void +xrep_xattr_fork_remove( + struct xfs_scrub *sc, + struct xfs_inode *ip) +{ + struct xfs_attr_sf_hdr *hdr; + struct xfs_ifork *ifp; + + /* + * If the data fork is in btree format, we can't change di_forkoff + * because we could run afoul of the rule that the data fork isn't + * supposed to be in btree format if there's enough space in the fork + * that it could have used extents format. Instead, reinitialize the + * attr fork to have a shortform structure with zero attributes. + */ + if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) { + ip->i_d.di_aformat = XFS_DINODE_FMT_LOCAL; + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + ifp->if_flags &= ~XFS_IFEXTENTS; + ifp->if_flags |= XFS_IFINLINE; + xfs_idata_realloc(ip, (int)sizeof(*hdr) - ifp->if_bytes, + XFS_ATTR_FORK); + hdr = (struct xfs_attr_sf_hdr *)ifp->if_u1.if_data; + hdr->count = 0; + hdr->totsize = cpu_to_be16(sizeof(*hdr)); + xfs_trans_log_inode(sc->tp, ip, + XFS_ILOG_CORE | XFS_ILOG_ADATA); + return; + } + + xfs_attr_fork_remove(ip, sc->tp); +} + +/* Rip the buffer ops off a block so that it can be marked stale. */ +STATIC int +xrep_xattr_stale_block( + struct xfs_inode *ip, + xfs_dablk_t dabno, + struct xfs_buf *bp, + void *priv) +{ + xfs_buf_stale(bp); + return 0; +} + +/* + * Free all the attribute fork blocks and delete the fork. The caller must + * join the inode to the transaction. This function returns with the inode + * joined to a clean scrub transaction. + */ +STATIC int +xrep_xattr_reset_fork( + struct xfs_scrub *sc) +{ + int error; + + if (sc->ip->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) + goto zap; + + /* Invalidate each attr block in the attr fork. */ + error = xrep_attr_walk_blind(sc->ip, XREP_ATTR_WALK_INCORE, + xrep_xattr_stale_block, NULL); + if (error) + return error; + + /* Now free all the blocks. */ + error = xfs_bunmapi_range(&sc->tp, sc->ip, XFS_ATTR_FORK, 0, + XFS_MAX_FILEOFF, XFS_BMAPI_NODISCARD); + if (error) + return error; + +zap: + xrep_xattr_fork_remove(sc, sc->ip); + return xrep_roll_trans(sc); +} + +/* + * Compare two xattr keys. ATTR_SECURE keys come before ATTR_ROOT and + * ATTR_ROOT keys come before user attrs. Otherwise sort in hash order. + */ +static int +xrep_xattr_key_cmp( + const void *a, + const void *b) +{ + const struct xrep_xattr_key *ap = a; + const struct xrep_xattr_key *bp = b; + + if (ap->flags > bp->flags) + return 1; + else if (ap->flags < bp->flags) + return -1; + + if (ap->hash > bp->hash) + return 1; + else if (ap->hash < bp->hash) + return -1; + return 0; +} + +/* + * Find all the extended attributes for this inode by scraping them out of the + * attribute key blocks by hand. The caller must clean up the lists if + * anything goes wrong. + */ +STATIC int +xrep_xattr_find_attributes( + struct xrep_xattr *rx) +{ + struct xfs_inode *ip = rx->sc->ip; + struct xfs_ifork *ifp; + int error; + + error = xrep_ino_dqattach(rx->sc); + if (error) + return error; + + /* Extent map should be loaded. */ + ifp = XFS_IFORK_PTR(ip, XFS_ATTR_FORK); + if (XFS_IFORK_FORMAT(ip, XFS_ATTR_FORK) != XFS_DINODE_FMT_LOCAL && + !(ifp->if_flags & XFS_IFEXTENTS)) { + error = xfs_iread_extents(rx->sc->tp, ip, XFS_ATTR_FORK); + if (error) + return error; + } + + /* Read every attr key and value and record them in memory. */ + error = xrep_xattr_recover(rx); + if (error) + return error; + + /* + * Reset the xchk_attr_buf to be as large as we're going to need it to + * be to store each attribute name and value as we re-add them to the + * file. We must preallocate the memory here because once we start + * to modify the filesystem we cannot afford an ENOMEM. + */ + error = xchk_setup_xattr_buf(rx->sc, rx->max_valuelen, KM_MAYFAIL); + if (error == -ENOMEM) + return -EDEADLOCK; + if (error) + return error; + + return 0; +} + +/* Insert one xattr key/value. */ +STATIC int +xrep_xattr_insert_rec( + const void *item, + void *priv) +{ + struct xfs_da_args args = { NULL }; + const struct xrep_xattr_key *key = item; + struct xrep_xattr *rx = priv; + unsigned char *name = xchk_xattr_namebuf(rx->sc); + unsigned char *value = xchk_xattr_valuebuf(rx->sc); + int error; + + /* + * The attribute name is stored near the end of the in-core buffer, + * though we reserve one more byte to ensure null termination. + */ + name[XATTR_NAME_MAX] = 0; + + error = xblob_get(rx->xattr_blobs, key->name_cookie, name, + key->namelen); + if (error) + return error; + + error = xblob_free(rx->xattr_blobs, key->name_cookie); + if (error) + return error; + + error = xblob_get(rx->xattr_blobs, key->value_cookie, value, + key->valuelen); + if (error) + return error; + + error = xblob_free(rx->xattr_blobs, key->value_cookie); + if (error) + return error; + + name[key->namelen] = 0; + + trace_xrep_xattr_insert_rec(rx->sc->ip, key->flags, name, key->namelen, + key->valuelen); + + args.dp = rx->sc->ip; + args.attr_filter = key->flags; + args.name = name; + args.namelen = key->namelen; + args.value = value; + args.valuelen = key->valuelen; + return xfs_attr_set(&args); +} + +/* + * Insert all the attributes that we collected. + * + * Commit the repair transaction and drop the ilock because the attribute + * setting code needs to be able to allocate special transactions and take the + * ilock on its own. Some day we'll have deferred attribute setting, at which + * point we'll be able to use that to replace the attributes atomically and + * safely. + */ +STATIC int +xrep_xattr_rebuild_tree( + struct xrep_xattr *rx) +{ + int error; + + /* + * Commit the repair transaction and drop the ILOCK so that we can + * use individual transactions to re-add each extended attribute. + */ + error = xfs_trans_commit(rx->sc->tp); + rx->sc->tp = NULL; + if (error) + return error; + + /* + * Drop the ILOCK so that we don't pin the tail of the log. We still + * hold the IOLOCK (aka i_rwsem) which will prevent attr modifications, + * but there's nothing to prevent userspace from reading/listing the + * attrs while we build a new attr fork. Oh well, at least the fs + * can't shut down those threads if they stumble into corrupt blocks. + */ + xfs_iunlock(rx->sc->ip, XFS_ILOCK_EXCL); + rx->sc->ilock_flags &= ~XFS_ILOCK_EXCL; + + /* + * Sort the attribute keys by hash to minimize dabtree splits when we + * rebuild the extended attribute information. + */ + error = xfbma_sort(rx->xattr_records, xrep_xattr_key_cmp); + if (error) + return error; + + /* Re-add every attr to the file. */ + return xfbma_iter_del(rx->xattr_records, xrep_xattr_insert_rec, rx); +} + +/* + * Repair the extended attribute metadata. + * + * XXX: Remote attribute value buffers encompass the entire (up to 64k) buffer. + * The buffer cache in XFS can't handle aliased multiblock buffers, so this + * might misbehave if the attr fork is crosslinked with other filesystem + * metadata. + */ +int +xrep_xattr( + struct xfs_scrub *sc) +{ + struct xrep_xattr rx = { + .sc = sc, + }; + int error; + + if (!xfs_inode_hasattr(sc->ip)) + return -ENOENT; + + /* Set up some storage */ + rx.xattr_records = xfbma_init(sizeof(struct xrep_xattr_key)); + if (IS_ERR(rx.xattr_records)) + return PTR_ERR(rx.xattr_records); + rx.xattr_blobs = xblob_init(); + if (IS_ERR(rx.xattr_blobs)) { + error = PTR_ERR(rx.xattr_blobs); + goto out_arr; + } + + /* Collect extended attributes by parsing raw blocks. */ + error = xrep_xattr_find_attributes(&rx); + if (error) + goto out; + + /* + * Invalidate and truncate all attribute fork extents. This is the + * point at which we are no longer able to bail out gracefully. + * We commit the transaction here because xfs_attr_set allocates its + * own transactions. + */ + xfs_trans_ijoin(sc->tp, sc->ip, 0); + error = xrep_xattr_reset_fork(sc); + if (error) + goto out; + + /* Now rebuild the attribute information. */ + error = xrep_xattr_rebuild_tree(&rx); +out: + xblob_destroy(rx.xattr_blobs); +out_arr: + xfbma_destroy(rx.xattr_records); + return error; +} diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index e59338b83838..8f3e078938af 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1360,3 +1360,34 @@ xrep_reset_perag_resv( out: return error; } + +/* + * See if this buffer can pass the given ->verify_struct() function. + * + * If the buffer already has ops attached and they're not the ones that were + * passed in, we reject the buffer. Otherwise, we perform the structure test + * (note that we do not check CRCs) and return the outcome of the test. The + * buffer ops and error state are left unchanged. + */ +bool +xrep_buf_verify_struct( + struct xfs_buf *bp, + const struct xfs_buf_ops *ops) +{ + const struct xfs_buf_ops *old_ops = bp->b_ops; + xfs_failaddr_t fa; + int old_error; + + if (old_ops) { + if (old_ops != ops) + return false; + } + + old_error = bp->b_error; + bp->b_ops = ops; + fa = bp->b_ops->verify_struct(bp); + bp->b_ops = old_ops; + bp->b_error = old_error; + + return fa == NULL; +} diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index ed5ac3ee1edb..fe1293334899 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -77,6 +77,7 @@ int xrep_inode(struct xfs_scrub *sc); int xrep_bmap_data(struct xfs_scrub *sc); int xrep_bmap_attr(struct xfs_scrub *sc); int xrep_symlink(struct xfs_scrub *sc); +int xrep_xattr(struct xfs_scrub *sc); struct xrep_newbt_resv { /* Link to list of extents that we've reserved. */ @@ -134,6 +135,8 @@ int xrep_newbt_claim_block(struct xfs_btree_cur *cur, struct xrep_newbt *xnr, void xrep_bload_estimate_slack(struct xfs_scrub *sc, struct xfs_btree_bload *bload); +bool xrep_buf_verify_struct(struct xfs_buf *bp, const struct xfs_buf_ops *ops); + #else static inline int xrep_attempt( @@ -178,6 +181,7 @@ xrep_reset_perag_resv( #define xrep_bmap_data xrep_notsupported #define xrep_bmap_attr xrep_notsupported #define xrep_symlink xrep_notsupported +#define xrep_xattr xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 21357228a7f8..df55dbd700a1 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -298,7 +298,7 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .type = ST_INODE, .setup = xchk_setup_xattr, .scrub = xchk_xattr, - .repair = xrep_notsupported, + .repair = xrep_xattr, }, [XFS_SCRUB_TYPE_SYMLINK] = { /* symbolic link */ .type = ST_INODE, diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 218758b76e74..00c26cae38ef 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -171,4 +171,7 @@ struct xchk_fscounters { unsigned long long icount_max; }; +bool xchk_xattr_set_map(struct xfs_scrub *sc, unsigned long *map, + unsigned int start, unsigned int len); + #endif /* __XFS_SCRUB_SCRUB_H__ */ diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 2c6c248be823..bb660f75cc56 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -12,6 +12,7 @@ #include "xfs_mount.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_da_format.h" #include "scrub/scrub.h" /* Figure out which block the btree cursor was pointing to. */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 661364b0180c..a2573dcd63f7 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1050,6 +1050,91 @@ TRACE_EVENT(xfbma_sort_stats, __entry->error) ); +TRACE_EVENT(xrep_xattr_recover_leafblock, + TP_PROTO(struct xfs_inode *ip, xfs_dablk_t dabno, uint16_t magic), + TP_ARGS(ip, dabno, magic), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(xfs_dablk_t, dabno) + __field(uint16_t, magic) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->dabno = dabno; + __entry->magic = magic; + ), + TP_printk("dev %d:%d ino 0x%llx dablk %u magic 0x%x", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __entry->dabno, + __entry->magic) +); + +#define XFS_ATTR_NSP_STR \ + { XFS_ATTR_LOCAL, "local" }, \ + { XFS_ATTR_ROOT, "root" }, \ + { XFS_ATTR_SECURE, "secure" } + +TRACE_EVENT(xrep_xattr_salvage_key, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, + unsigned int namelen, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, flags) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(unsigned int, valuelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->flags = flags; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->valuelen = valuelen; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flags, "|", XFS_ATTR_NSP_STR), + __entry->namelen, + __get_str(name), + __entry->valuelen) +); + +TRACE_EVENT(xrep_xattr_insert_rec, + TP_PROTO(struct xfs_inode *ip, unsigned int flags, char *name, + unsigned int namelen, unsigned int valuelen), + TP_ARGS(ip, flags, name, namelen, valuelen), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, flags) + __field(unsigned int, namelen) + __dynamic_array(char, name, namelen) + __field(unsigned int, valuelen) + ), + TP_fast_assign( + __entry->dev = ip->i_mount->m_super->s_dev; + __entry->ino = ip->i_ino; + __entry->flags = flags; + __entry->namelen = namelen; + memcpy(__get_str(name), name, namelen); + __entry->valuelen = valuelen; + ), + TP_printk("dev %d:%d ino 0x%llx flags %s name '%.*s' valuelen %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_flags(__entry->flags, "|", XFS_ATTR_NSP_STR), + __entry->namelen, + __get_str(name), + __entry->valuelen) +); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 65f8a1990acc..25292ce9eddd 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -520,8 +520,12 @@ _xfs_buf_obj_cmp( * it stale has not yet committed. i.e. we are * reallocating a busy extent. Skip this buffer and * continue searching for an exact match. + * + * Note: If we're scanning for incore buffers to stale, don't + * complain if we find non-stale buffers. */ - ASSERT(bp->b_flags & XBF_STALE); + if (!(map->bm_flags & XBM_SCAN_STALE)) + ASSERT(bp->b_flags & XBF_STALE); return 1; } return 0; @@ -587,6 +591,9 @@ xfs_buf_find( *found_bp = NULL; + if (flags & XBF_SCAN_STALE) + cmap.bm_flags |= XBM_SCAN_STALE; + for (i = 0; i < nmaps; i++) cmap.bm_len += map[i].bm_len; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index deaa9c2607af..c8643b629caa 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -33,6 +33,12 @@ /* flags used only as arguments to access routines */ #define XBF_TRYLOCK (1 << 16)/* lock requested, but do not wait */ #define XBF_UNMAPPED (1 << 17)/* do not map the buffer */ +/* + * The caller is scanning for incore buffers to mark stale after a repair. + * Don't complain if we find a non-stale buffer of the wrong length, that's + * exactly the point. + */ +#define XBF_SCAN_STALE (1 << 18) /* flags used only internally */ #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ @@ -102,6 +108,8 @@ typedef void (*xfs_buf_iodone_t)(struct xfs_buf *); struct xfs_buf_map { xfs_daddr_t bm_bn; /* block number for I/O */ int bm_len; /* size of I/O */ + unsigned int bm_flags; +#define XBM_SCAN_STALE (1 << 0) /* see XBF_SCAN_STALE */ }; #define DEFINE_SINGLE_BUF_MAP(map, blkno, numblk) \ |