// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (C) 2020 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_bit.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_sb.h" #include "xfs_inode.h" #include "xfs_icache.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" #include "xfs_dir2.h" #include "xfs_dir2_priv.h" #include "xfs_bmap.h" #include "xfs_quota.h" #include "xfs_bmap_btree.h" #include "xfs_trans_space.h" #include "xfs_iwalk.h" #include "xfs_swapext.h" #include "xfs_bmap_util.h" #include "scrub/xfs_scrub.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/array.h" #include "scrub/blob.h" #include "scrub/parent.h" /* * Directory Repair * ================ * * We repair directories by reading the directory leaf blocks looking for * entries, truncate the entire directory fork, and reinsert all the entries. * Unfortunately, there's not yet a secondary copy of directory attribute data, * which means that if we blow up midway through there's little we can do. */ /* Directory entry to be restored in the new directory. */ struct xrep_dir_key { /* Cookie for retrieval of the dirent name. */ xblob_cookie name_cookie; /* Target inode number. */ xfs_ino_t ino; /* Hash of the dirent name. */ unsigned int hash; /* Length of the dirent name. */ uint8_t namelen; /* File type of the dirent. */ uint8_t ftype; } __packed; struct xrep_dir { struct xfs_scrub *sc; /* Fixed-size array of xrep_dir_key structures. */ struct xfbma *dir_entries; /* Blobs containing directory entry names. */ struct xblob *dir_names; /* * Potential parent of the directory we're reconstructing. This can * be NULLFSINO if we haven't found any parents; 0 if we've found too * many parents during salvaging; or a regular inode number if we've * found a good candidate. */ xfs_ino_t parent_ino; /* nlink value of the corrected directory. */ xfs_nlink_t new_nlink; }; /* * Decide if we want to salvage this entry. We don't bother with oversized * names or the dot entry. */ STATIC int xrep_dir_want_salvage( struct xrep_dir *rd, const char *name, int namelen, xfs_ino_t ino) { struct xfs_mount *mp = rd->sc->mp; /* No pointers to ourselves or to garbage. */ if (ino == rd->sc->ip->i_ino) return false; if (!xfs_verify_dir_ino(mp, ino)) return false; /* No weird looking names or dot entries. */ if (namelen > MAXNAMELEN || namelen <= 0) return false; if (namelen == 1 && name[0] == '.') return false; return true; } /* Allocate an in-core record to hold entries while we rebuild the dir data. */ STATIC int xrep_dir_salvage_entry( struct xrep_dir *rd, unsigned char *name, unsigned int namelen, xfs_ino_t ino) { struct xrep_dir_key key = { .ino = ino, }; struct xfs_inode *ip; unsigned int i; int error = 0; if (xchk_should_terminate(rd->sc, &error)) return error; /* Truncate the name to the first illegal character. */ for (i = 0; i < namelen && name[i] != 0 && name[i] != '/'; i++); key.namelen = i; key.hash = xfs_da_hashname(name, key.namelen); trace_xrep_dir_salvage_entry(rd->sc->ip, name, key.namelen, ino); /* If this is a '..' entry, we can save it for later... */ if (key.namelen == 2 && name[0] == '.' && name[1] == '.') { switch (rd->parent_ino) { case NULLFSINO: /* Found a parent, save it for later. */ rd->parent_ino = ino; break; default: /* * Found more than one parent, so force a directory * tree walk later. */ rd->parent_ino = 0; /* fall through */ case 0: break; } return 0; } /* * Compute the ftype or dump the entry if we can't. We don't lock the * inode because inodes can't change type while we have a reference. */ error = xfs_iget(rd->sc->mp, rd->sc->tp, ino, XFS_IGET_UNTRUSTED, 0, &ip); if (error) return 0; key.ftype = xfs_mode_to_ftype(VFS_I(ip)->i_mode); xfs_irele(ip); /* Remember this for later. */ error = xblob_put(rd->dir_names, &key.name_cookie, name, key.namelen); if (error) return error; return xfbma_append(rd->dir_entries, &key); } /* Record a shortform directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_sf_entry( struct xrep_dir *rd, struct xfs_dir2_sf_hdr *sfp, struct xfs_dir2_sf_entry *sfep) { xfs_ino_t ino; ino = xfs_dir2_sf_get_ino(rd->sc->mp, sfp, sfep); if (!xrep_dir_want_salvage(rd, sfep->name, sfep->namelen, ino)) return 0; return xrep_dir_salvage_entry(rd, sfep->name, sfep->namelen, ino); } /* Record a regular directory entry for later reinsertion. */ STATIC int xrep_dir_salvage_data_entry( struct xrep_dir *rd, struct xfs_dir2_data_entry *dep) { xfs_ino_t ino; ino = be64_to_cpu(dep->inumber); if (!xrep_dir_want_salvage(rd, dep->name, dep->namelen, ino)) return 0; return xrep_dir_salvage_entry(rd, dep->name, dep->namelen, ino); } /* Try to recover block/data format directory entries. */ STATIC int xrep_dir_recover_data( struct xrep_dir *rd, struct xfs_buf *bp) { struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; unsigned int offset; unsigned int end; int error; /* error return value */ /* * Loop over the data portion of the block. * Each object is a real entry (dep) or an unused one (dup). */ offset = geo->data_entry_offset; end = min_t(unsigned int, BBTOB(bp->b_length), xfs_dir3_data_end_offset(geo, bp->b_addr)); while (offset < end) { struct xfs_dir2_data_unused *dup = bp->b_addr + offset; struct xfs_dir2_data_entry *dep = bp->b_addr + offset; if (xchk_should_terminate(rd->sc, &error)) break; /* Skip unused entries. */ if (be16_to_cpu(dup->freetag) == XFS_DIR2_DATA_FREE_TAG) { offset += be16_to_cpu(dup->length); continue; } /* Don't walk off the end of the block. */ offset += xfs_dir2_data_entsize(rd->sc->mp, dep->namelen); if (offset > end) break; /* Ok, let's save this entry. */ error = xrep_dir_salvage_data_entry(rd, dep); if (error) return error; } return 0; } /* Try to recover shortform directory entries. */ STATIC int xrep_dir_recover_sf( struct xrep_dir *rd) { struct xfs_dir2_sf_hdr *sfp; struct xfs_dir2_sf_entry *sfep; struct xfs_dir2_sf_entry *next; struct xfs_ifork *ifp; unsigned char *end; int error; ifp = XFS_IFORK_PTR(rd->sc->ip, XFS_DATA_FORK); sfp = (struct xfs_dir2_sf_hdr *)rd->sc->ip->i_df.if_u1.if_data; end = (unsigned char *)ifp->if_u1.if_data + ifp->if_bytes; rd->parent_ino = xfs_dir2_sf_get_parent_ino(sfp); sfep = xfs_dir2_sf_firstentry(sfp); while ((unsigned char *)sfep < end) { if (xchk_should_terminate(rd->sc, &error)) break; next = xfs_dir2_sf_nextentry(rd->sc->mp, sfp, sfep); if ((unsigned char *)next > end) break; /* Ok, let's save this entry. */ error = xrep_dir_salvage_sf_entry(rd, sfp, sfep); if (error) return error; sfep = next; } return 0; } /* * Try to figure out the format of this directory from the data fork mappings * and the directory size. If we can be reasonably sure of format, we can be * more aggressive in salvaging directory entries. On return, @magic_guess * will be set to DIR3_BLOCK_MAGIC if we think this is a "block format" * directory; DIR3_DATA_MAGIC if we think this is a "data format" directory, * and 0 if we can't tell. */ STATIC void xrep_dir_guess_format( struct xrep_dir *rd, __be32 *magic_guess) { struct xfs_inode *ip = rd->sc->ip; struct xfs_da_geometry *geo = rd->sc->mp->m_dir_geo; xfs_fileoff_t last; int error; ASSERT(xfs_sb_version_hascrc(&ip->i_mount->m_sb)); *magic_guess = 0; /* * If there's a single directory block and the directory size is * exactly one block, this has to be a single block format directory. */ error = xfs_bmap_last_offset(ip, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(ip->i_mount, last) == geo->blksize && ip->i_d.di_size == geo->blksize) { *magic_guess = cpu_to_be32(XFS_DIR3_BLOCK_MAGIC); return; } /* * If the last extent before the leaf offset matches the directory * size and the directory size is larger than 1 block, this is a * data format directory. */ last = geo->leafblk; error = xfs_bmap_last_before(rd->sc->tp, ip, &last, XFS_DATA_FORK); if (!error && XFS_FSB_TO_B(ip->i_mount, last) > geo->blksize && XFS_FSB_TO_B(ip->i_mount, last) == ip->i_d.di_size) { *magic_guess = cpu_to_be32(XFS_DIR3_DATA_MAGIC); return; } } /* Recover directory entries from a specific directory block. */ STATIC int xrep_dir_recover_dirblock( struct xrep_dir *rd, __be32 magic_guess, xfs_dablk_t dabno) { struct xfs_dir2_data_hdr *hdr; struct xfs_buf *bp; __be32 oldmagic; int error; /* * Try to read buffer. We invalidate them in the next step so we don't * bother to set a buffer type or ops. */ error = xfs_da_read_buf(rd->sc->tp, rd->sc->ip, dabno, XFS_DABUF_MAP_HOLE_OK, &bp, XFS_DATA_FORK, NULL); if (error || !bp) return error; hdr = bp->b_addr; oldmagic = hdr->magic; trace_xrep_dir_recover_dirblock(rd->sc->ip, dabno, be32_to_cpu(hdr->magic), be32_to_cpu(magic_guess)); /* * If we're sure of the block's format, proceed with the salvage * operation using the specified magic number. */ if (magic_guess) { hdr->magic = magic_guess; goto recover; } /* * If we couldn't guess what type of directory this is, then we will * only salvage entries from directory blocks that match the magic * number and pass verifiers. */ switch (hdr->magic) { case cpu_to_be32(XFS_DIR2_BLOCK_MAGIC): case cpu_to_be32(XFS_DIR3_BLOCK_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_block_buf_ops)) goto out; break; case cpu_to_be32(XFS_DIR2_DATA_MAGIC): case cpu_to_be32(XFS_DIR3_DATA_MAGIC): if (!xrep_buf_verify_struct(bp, &xfs_dir3_data_buf_ops)) goto out; break; default: goto out; } recover: error = xrep_dir_recover_data(rd, bp); out: hdr->magic = oldmagic; xfs_trans_brelse(rd->sc->tp, bp); return error; } /* Extract as many directory entries as we can. */ STATIC int xrep_dir_recover( struct xrep_dir *rd) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec got; struct xfs_scrub *sc = rd->sc; struct xfs_ifork *ifp; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; xfs_dablk_t dabno; __be32 magic_guess; int error = 0; if (rd->sc->ip->i_d.di_format == XFS_DINODE_FMT_LOCAL) return xrep_dir_recover_sf(rd); xrep_dir_guess_format(rd, &magic_guess); /* Iterate each directory data block in the data fork. */ ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); for_each_xfs_iext(ifp, &icur, &got) { /* Leaf blocks come after all data blocks, so cut off there. */ xfs_trim_extent(&got, 0, geo->leafblk); if (got.br_blockcount == 0) continue; for (dabno = round_up(got.br_startoff, geo->fsbcount); dabno < got.br_startoff + got.br_blockcount; dabno += geo->fsbcount) { if (xchk_should_terminate(rd->sc, &error)) return error; error = xrep_dir_recover_dirblock(rd, magic_guess, dabno); if (error) break; } } return error; } /* Invalidate a directory's blocks and unmap them. */ STATIC int xrep_dir_reset_nonlocal( struct xfs_scrub *sc, struct xfs_inode *dp) { struct xfs_iext_cursor icur; struct xfs_bmbt_irec got; struct xfs_ifork *ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); struct xfs_buf *bp; struct xfs_mount *mp = sc->mp; struct xfs_da_geometry *geo = mp->m_dir_geo; xfs_fileoff_t off; /* * Invalidate each directory block. All directory blocks are of * fsbcount length and alignment, so we only need to walk those same * offsets. * * We use TRYLOCK here (recall that we hold the ILOCK of the directory * inode) so that we skip any buffer that's locked on the assumption * that we don't own that block. */ for_each_xfs_iext(ifp, &icur, &got) { for (off = round_up(got.br_startoff, geo->fsbcount); off < got.br_startoff + got.br_blockcount; off += geo->fsbcount) { xfs_fsblock_t fsbno; fsbno = (off - got.br_startoff) + got.br_startblock; bp = xfs_buf_incore(mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, fsbno), XFS_FSB_TO_BB(mp, geo->fsbcount), XBF_TRYLOCK | XBF_SCAN_STALE); if (bp) { xfs_buf_stale(bp); xfs_buf_relse(bp); } } } /* Now free all the blocks. */ return xfs_bunmapi_range(&sc->tp, dp, XFS_DATA_FORK, 0, XFS_MAX_FILEOFF, XFS_BMAPI_NODISCARD); } /* * Free all the directory blocks and reset the data fork. The caller must * join the inode to the transaction. This function returns with the inode * joined to a clean scrub transaction. */ STATIC int xrep_dir_reset_fork( struct xfs_scrub *sc, struct xfs_inode *dp, xfs_ino_t parent_ino) { struct xfs_ifork *ifp; struct xfs_da_args *args = sc->buf; int error; ifp = XFS_IFORK_PTR(dp, XFS_DATA_FORK); /* Unmap all the directory buffers. */ if (xfs_ifork_has_extents(dp, XFS_DATA_FORK)) { error = xrep_dir_reset_nonlocal(sc, dp); if (error) return error; } trace_xrep_dir_reset_fork(dp, parent_ino); /* Reset the data fork to an empty data fork. */ xfs_ifork_reset(ifp); ifp->if_flags = XFS_IFINLINE; ifp->if_bytes = 0; dp->i_d.di_size = 0; /* Reinitialize the short form directory. */ args->geo = sc->mp->m_dir_geo; args->dp = dp; args->trans = sc->tp; error = xfs_dir2_sf_create(args, parent_ino); if (error) return error; return xfs_trans_roll_inode(&sc->tp, dp); } /* Compare two dir keys, sorting in hash order. */ static int xrep_dir_key_cmp( const void *a, const void *b) { const struct xrep_dir_key *ap = a; const struct xrep_dir_key *bp = b; if (ap->hash > bp->hash) return 1; else if (ap->hash < bp->hash) return -1; return 0; } /* * Find all the directory entries for this inode by scraping them out of the * directory leaf blocks by hand. The caller must clean up the lists if * anything goes wrong. */ STATIC int xrep_dir_find_entries( struct xrep_dir *rd) { struct xfs_inode *ip = rd->sc->ip; struct xfs_ifork *ifp; int error; error = xrep_ino_dqattach(rd->sc); if (error) return error; /* Extent map should be loaded. */ ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK); if (XFS_IFORK_FORMAT(ip, XFS_DATA_FORK) != XFS_DINODE_FMT_LOCAL && !(ifp->if_flags & XFS_IFEXTENTS)) { error = xfs_iread_extents(rd->sc->tp, ip, XFS_DATA_FORK); if (error) return error; } /* Read every directory entry and record them in memory. */ return xrep_dir_recover(rd); } /* Insert one dir entry. */ STATIC int xrep_dir_insert_rec( const void *item, void *priv) { struct xfs_name name; const struct xrep_dir_key *key = item; struct xrep_dir *rd = priv; struct xfs_trans *tp; char *namebuf = rd->sc->buf; struct xfs_mount *mp = rd->sc->mp; uint resblks; int error; if (XFS_FORCED_SHUTDOWN(mp)) return -EIO; /* The entry name is stored in the in-core buffer. */ name.name = namebuf; error = xblob_get(rd->dir_names, key->name_cookie, namebuf, key->namelen); if (error) return error; error = xblob_free(rd->dir_names, key->name_cookie); if (error) return error; trace_xrep_dir_insert_rec(rd->sc->tempip, namebuf, key->namelen, key->ino, key->ftype); error = xfs_qm_dqattach(rd->sc->tempip); if (error) return error; resblks = XFS_LINK_SPACE_RES(mp, key->namelen); error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, resblks, 0, 0, &tp); if (error == -ENOSPC) { resblks = 0; error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, 0, 0, 0, &tp); } if (error) return error; xfs_ilock(rd->sc->tempip, XFS_ILOCK_EXCL); xfs_trans_ijoin(tp, rd->sc->tempip, XFS_ILOCK_EXCL); name.len = key->namelen; name.type = key->ftype; error = xfs_dir_createname(tp, rd->sc->tempip, &name, key->ino, resblks); if (error) goto err; if (name.type == XFS_DIR3_FT_DIR) rd->new_nlink++; xfs_trans_log_inode(tp, rd->sc->tempip, XFS_ILOG_CORE); return xfs_trans_commit(tp); err: xfs_trans_cancel(tp); return error; } /* * Prepare both inodes' directory forks for extent swapping. Promote the * tempfile from short format to leaf format, and if the file being repaired * has a short format attr fork, turn it into an empty extent list. */ STATIC int xrep_dir_swap_prep( struct xfs_scrub *sc, bool temp_local, bool ip_local) { int error; /* * If the tempfile's attributes are in shortform format, convert that * to a single leaf extent so that we can use the atomic extent swap. */ if (temp_local) { struct xfs_da_args args = { .dp = sc->tempip, .geo = sc->mp->m_dir_geo, .whichfork = XFS_DATA_FORK, .trans = sc->tp, .total = 1, }; error = xfs_dir2_sf_to_block(&args); if (error) return error; error = xfs_defer_finish(&sc->tp); if (error) return error; } /* * If the file being repaired had a shortform attribute fork, convert * that to an empty extent list in preparation for the atomic extent * swap. */ if (ip_local) { struct xfs_ifork *ifp; sc->ip->i_d.di_format = XFS_DINODE_FMT_EXTENTS; sc->ip->i_d.di_nextents = 0; ifp = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); xfs_ifork_reset(ifp); ifp->if_bytes = 0; ifp->if_u1.if_root = NULL; ifp->if_height = 0; ifp->if_flags |= XFS_IFEXTENTS; xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA); } return 0; } /* * Set the owner for this directory block to the directory being repaired. * Return the magic number that we found, or the usual negative error. */ STATIC int xrep_dir_reset_owner( struct xfs_scrub *sc, xfs_dablk_t dabno, struct xfs_buf *bp, unsigned int *magic) { struct xfs_da_geometry *geo = sc->mp->m_dir_geo; struct xfs_dir3_data_hdr *data3 = bp->b_addr; struct xfs_da3_blkinfo *info3 = bp->b_addr; struct xfs_dir3_free_hdr *free3 = bp->b_addr; struct xfs_dir2_data_entry *dep; /* Directory data blocks. */ if (dabno < geo->leafblk) { *magic = be32_to_cpu(data3->hdr.magic); if (*magic != XFS_DIR3_BLOCK_MAGIC && *magic != XFS_DIR3_DATA_MAGIC) return -EFSCORRUPTED; /* * If this is a block format directory, it's possible that the * block was created as part of converting the temp directory * from short format to block format in order to use the atomic * extent swap. In that case, the '.' entry will be set to * the temp dir, so find the dot entry and reset it. */ if (*magic == XFS_DIR3_BLOCK_MAGIC) { dep = bp->b_addr + geo->data_entry_offset; if (dep->namelen != 1 || dep->name[0] != '.') return -EFSCORRUPTED; dep->inumber = cpu_to_be64(sc->ip->i_ino); } data3->hdr.owner = be64_to_cpu(sc->ip->i_ino); return 0; } /* Directory leaf and da node blocks. */ if (dabno < geo->freeblk) { *magic = be16_to_cpu(info3->hdr.magic); switch (*magic) { case XFS_DA3_NODE_MAGIC: case XFS_DIR3_LEAF1_MAGIC: case XFS_DIR3_LEAFN_MAGIC: break; default: return -EFSCORRUPTED; } info3->owner = be64_to_cpu(sc->ip->i_ino); return 0; } /* Directory free blocks. */ *magic = be32_to_cpu(free3->hdr.magic); if (*magic != XFS_DIR3_FREE_MAGIC) return -EFSCORRUPTED; free3->hdr.owner = be64_to_cpu(sc->ip->i_ino); return 0; } /* * If the buffer didn't have buffer ops set, we need to set them now that we've * dirtied the directory block. */ STATIC void xrep_dir_set_verifier( unsigned int magic, struct xfs_buf *bp) { switch (magic) { case XFS_DIR3_BLOCK_MAGIC: bp->b_ops = &xfs_dir3_block_buf_ops; break; case XFS_DIR3_DATA_MAGIC: bp->b_ops = &xfs_dir3_data_buf_ops; break; case XFS_DA3_NODE_MAGIC: bp->b_ops = &xfs_da3_node_buf_ops; break; case XFS_DIR3_LEAF1_MAGIC: bp->b_ops = &xfs_dir3_leaf1_buf_ops; break; case XFS_DIR3_LEAFN_MAGIC: bp->b_ops = &xfs_dir3_leafn_buf_ops; break; case XFS_DIR3_FREE_MAGIC: bp->b_ops = &xfs_dir3_free_buf_ops; break; } xfs_buf_set_ref(bp, XFS_DIR_BTREE_REF); } /* * Change the owner field of every block in the data fork to match the * directory being repaired. */ STATIC int xrep_dir_swap_owner( struct xfs_scrub *sc) { struct xfs_bmbt_irec map; struct xfs_da_geometry *geo = sc->mp->m_dir_geo; struct xfs_buf *bp; xfs_fileoff_t offset = 0; xfs_fileoff_t end = XFS_MAX_FILEOFF; xfs_dablk_t dabno; int nmap; int error; for (offset = 0; offset < end; offset = map.br_startoff + map.br_blockcount) { nmap = 1; error = xfs_bmapi_read(sc->tempip, offset, end - offset, &map, &nmap, 0); if (error) return error; if (nmap != 1) return -EFSCORRUPTED; if (!xfs_bmap_is_real_extent(&map)) continue; for (dabno = round_up(map.br_startoff, geo->fsbcount); dabno < map.br_startoff + map.br_blockcount; dabno += geo->fsbcount) { unsigned int magic; error = xfs_da_read_buf(sc->tp, sc->tempip, dabno, 0, &bp, XFS_DATA_FORK, NULL); if (error) return error; if (!bp) return -EFSCORRUPTED; error = xrep_dir_reset_owner(sc, dabno, bp, &magic); if (error) { xfs_trans_brelse(sc->tp, bp); return error; } if (bp->b_ops == NULL) xrep_dir_set_verifier(magic, bp); xfs_trans_ordered_buf(sc->tp, bp); xfs_trans_brelse(sc->tp, bp); } } return 0; } /* * If both files' directory structure are in short format, we can copy * the short format data from the tempfile to the repaired file if it'll * fit. */ STATIC void xrep_dir_swap_local( struct xfs_scrub *sc, int newsize) { struct xfs_ifork *ifp1, *ifp2; ifp1 = XFS_IFORK_PTR(sc->tempip, XFS_DATA_FORK); ifp2 = XFS_IFORK_PTR(sc->ip, XFS_DATA_FORK); xfs_idata_realloc(sc->ip, ifp2->if_bytes - ifp1->if_bytes, XFS_DATA_FORK); memcpy(ifp2->if_u1.if_data, ifp1->if_u1.if_data, newsize); xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE | XFS_ILOG_DDATA); } struct xfs_name xfs_name_dot = { (unsigned char *)".", 1, XFS_DIR3_FT_DIR }; /* Swap the temporary directory's data fork with the one being repaired. */ STATIC int xrep_dir_swap( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; unsigned int resblks; bool ip_local, temp_local; int error; resblks = xfs_swap_range_calc_resblks(sc->tempip, sc->ip, XFS_DATA_FORK); error = xchk_trans_alloc(sc, max(1U, resblks)); if (error) return error; /* * Lock and join the inodes to the tansaction so that transaction commit * or cancel will unlock the inodes from this point onwards. */ xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); sc->temp_ilock_flags |= XFS_ILOCK_EXCL; sc->ilock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(sc->tp, sc->ip, 0); xfs_trans_ijoin(sc->tp, sc->tempip, 0); /* * Reset the temporary directory's '.' entry to point to the directory * we're repairing. Note: shortform directories lack the dot entry. * * It's possible that this replacement could also expand a sf tempdir * into block format. */ if (XFS_IFORK_FORMAT(sc->tempip, XFS_DATA_FORK) != XFS_DINODE_FMT_LOCAL) { error = xfs_dir_replace(sc->tp, sc->tempip, &xfs_name_dot, sc->ip->i_ino, resblks); if (error) return error; } /* * Reset the temporary directory's '..' entry to point to the parent * that we found. The temporary directory was created with the root * directory as the parent, so we can skip this if repairing a * subdirectory of the root. * * It's also possible that this replacement could also expand a sf * tempdir into block format. */ if (rd->parent_ino != sc->mp->m_rootip->i_ino) { error = xfs_dir_replace(sc->tp, rd->sc->tempip, &xfs_name_dotdot, rd->parent_ino, resblks); if (error) return error; } /* XXX: do we need to roll the transaction here? */ /* * Changing the dot and dotdot entries could have changed the shape of * the directory, so we recompute these. */ ip_local = XFS_IFORK_FORMAT(sc->ip, XFS_DATA_FORK) == XFS_DINODE_FMT_LOCAL; temp_local = XFS_IFORK_FORMAT(sc->tempip, XFS_DATA_FORK) == XFS_DINODE_FMT_LOCAL; /* * If the both files have a local format data fork and the rebuilt * directory data would fit in the repaired file's data fork, copy * the contents from the tempfile and declare ourselves done. */ if (ip_local && temp_local) { if (sc->tempip->i_d.di_size <= XFS_IFORK_DSIZE(sc->ip)) { xrep_dir_swap_local(sc, sc->tempip->i_d.di_size); set_nlink(VFS_I(sc->ip), rd->new_nlink); return 0; } } /* Otherwise, make sure both data forks are in block-mapping mode. */ error = xrep_dir_swap_prep(sc, temp_local, ip_local); if (error) return error; /* Rewrite the owner field of all attr blocks in the temporary file. */ error = xrep_dir_swap_owner(sc); if (error) return error; /* * Set nlink of the directory under repair to the number of * subdirectories that will be in the new directory data. Do this in * the same transaction sequence that (atomically) commits the new * data. */ set_nlink(VFS_I(sc->ip), rd->new_nlink); return xfs_swapext_atomic(&sc->tp, sc->tempip, sc->ip, XFS_DATA_FORK, 0, 0, NULLFILEOFF, XFS_SWAPEXT_SET_SIZES | XFS_SWAPEXT_TO_SHORTFORM2); } /* * Insert all the attributes that we collected. * * Commit the repair transaction and drop the ilock because the attribute * setting code needs to be able to allocate special transactions and take the * ilock on its own. Some day we'll have deferred attribute setting, at which * point we'll be able to use that to replace the attributes atomically and * safely. */ STATIC int xrep_dir_rebuild_tree( struct xrep_dir *rd) { int error; /* * Commit the existing transaction and drop the ILOCK so that we can * use a series of small transactions to rebuild the directory. */ error = xfs_trans_commit(rd->sc->tp); rd->sc->tp = NULL; if (error) return error; /* * Drop the ILOCK so that we don't pin the tail of the log. We still * hold the IOLOCK (aka i_rwsem) which will prevent directory access. */ xfs_iunlock(rd->sc->ip, XFS_ILOCK_EXCL); rd->sc->ilock_flags &= ~XFS_ILOCK_EXCL; /* * Sort the entries hash to minimize dabtree splits when we rebuild the * directory tree information. */ error = xfbma_sort(rd->dir_entries, xrep_dir_key_cmp); if (error) return error; /* Re-add every entry to the temporary directory. */ error = xfbma_iter_del(rd->dir_entries, xrep_dir_insert_rec, rd); if (error) return error; /* Swap the tempdir's data fork with the file being repaired. */ error = xrep_dir_swap(rd); if (error) return error; /* * Now reset the data fork of the temp directory to an empty shortform * directory because inactivation does nothing for directories. We're * done with the inode that we want to repair, so roll the transaction * and drop its ILOCK before we tackle the temporary file. */ error = xfs_trans_roll_inode(&rd->sc->tp, rd->sc->tempip); if (error) return error; xfs_iunlock(rd->sc->ip, XFS_ILOCK_EXCL); rd->sc->ilock_flags &= ~XFS_ILOCK_EXCL; return xrep_dir_reset_fork(rd->sc, rd->sc->tempip, rd->sc->mp->m_rootip->i_ino); } /* * If this directory entry points to the directory we're rebuilding, then the * directory we're scanning is the parent. Remember the parent. */ STATIC int xrep_dir_absorb_parent( struct xfs_inode *dp, struct xfs_name *name, unsigned int dtype, void *data) { struct xrep_dir *rd = data; int error = 0; /* Uhoh, more than one parent for a dir? */ if (rd->parent_ino != NULLFSINO) return -EFSCORRUPTED; if (xchk_should_terminate(rd->sc, &error)) return error; /* We found a potential parent; remember this. */ rd->parent_ino = dp->i_ino; return 0; } /* * Make sure we return with a valid parent inode. * * If the directory salvaging step found a single '..' entry, check the * alleged parent for a dentry pointing to the directory. If this succeds, * we're done. Otherwise, scan the entire filesystem for a parent. */ STATIC int xrep_dir_validate_parent( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; struct xfs_inode *parent; xfs_nlink_t expected_nlink, nlink; int error; /* * If the directory salvage scan found no parent or found an obviously * incorrect parent, try asking the dcache for the parent. * * If the dcache doesn't know about a parent or the parent seems * obviously incorrect, jump to the filesystem scan. * * Otherwise, if the alleged parent seems plausible, scan the directory * to make sure it really points to us. */ if (!xrep_parent_acceptable(sc, rd->parent_ino)) rd->parent_ino = xrep_parent_check_dcache(sc->ip); if (!xrep_parent_acceptable(sc, rd->parent_ino)) goto scan; /* * Grab this parent inode. Since we release the inode before we cancel * the scrub transaction and don't know if releasing the inode will * trigger eofblocks cleanup (which allocates what would be a nested * transaction), we avoid DONTCACHE here. */ error = xfs_iget(sc->mp, sc->tp, rd->parent_ino, XFS_IGET_UNTRUSTED, 0, &parent); if (error) goto scan; if (!S_ISDIR(VFS_I(parent)->i_mode)) goto rele_scan; /* * We prefer to keep the inode locked while we lock and search its * alleged parent for a forward reference. If we can grab the iolock, * validate the pointers and we're done. We must use nowait here to * avoid an ABBA deadlock on the parent and the child inodes. */ if (!xfs_ilock_nowait(parent, XFS_IOLOCK_SHARED)) goto rele_scan; /* * If we're an unlinked directory, the parent /won't/ have a link * to us. Otherwise, it should have one link. */ expected_nlink = VFS_I(sc->ip)->i_nlink == 0 ? 0 : 1; error = xchk_parent_count_parent_dentries(sc, parent, &nlink); if (error) goto unlock_rele_scan; /* The parent is an exact match, we're done. */ if (nlink == expected_nlink) { xfs_iunlock(parent, XFS_IOLOCK_SHARED); xfs_irele(parent); return 0; } unlock_rele_scan: xfs_iunlock(parent, XFS_IOLOCK_SHARED); rele_scan: xfs_irele(parent); scan: /* * If we're an unlinked directory, the parent /won't/ have a link * to us. Set the parent directory to the root. */ if (VFS_I(rd->sc->ip)->i_nlink == 0) { rd->parent_ino = sc->mp->m_sb.sb_rootino; return 0; } /* Scan the entire directory tree for the directory's parent. */ error = xrep_scan_for_parents(sc, sc->ip->i_ino, xrep_dir_absorb_parent, rd); if (error) return error; return rd->parent_ino == NULLFSINO ? -EFSCORRUPTED : 0; } /* * Repair the directory metadata. * * XXX: Directory entry buffers can be multiple fsblocks in size. The buffer * cache in XFS can't handle aliased multiblock buffers, so this might * misbehave if the directory blocks are crosslinked with other filesystem * metadata. * * XXX: Is it necessary to check the dcache for this directory to make sure * that we always recreate every cached entry? */ int xrep_dir( struct xfs_scrub *sc) { struct xrep_dir rd = { .sc = sc, .parent_ino = NULLFSINO, .new_nlink = 2, }; int error; /* Set up some storage */ rd.dir_entries = xfbma_init(sizeof(struct xrep_dir_key)); if (IS_ERR(rd.dir_entries)) return PTR_ERR(rd.dir_entries); rd.dir_names = xblob_init(); if (IS_ERR(rd.dir_names)) { error = PTR_ERR(rd.dir_names); goto out_arr; } /* * The directory scrubber might have dropped the ILOCK, so pick it up * again. */ if (!(sc->ilock_flags & XFS_ILOCK_EXCL)) { xfs_ilock(sc->ip, XFS_ILOCK_EXCL); sc->ilock_flags |= XFS_ILOCK_EXCL; } /* Collect directory entries by parsing raw leaf blocks. */ error = xrep_dir_find_entries(&rd); if (error) goto out; /* * Validate the parent pointer that we observed while salvaging the * directory; or scan the filesystem to find one. We drop the ILOCK * on the directory being repaired to avoid ABBA deadlocks, though we * maintain the directory IOLOCK to prevent concurrent modifications. */ xfs_iunlock(sc->ip, XFS_ILOCK_EXCL); error = xrep_dir_validate_parent(&rd); xfs_ilock(sc->ip, XFS_ILOCK_EXCL); if (error) goto out; /* Now rebuild the directory information. */ error = xrep_dir_rebuild_tree(&rd); out: xblob_destroy(rd.dir_names); out_arr: xfbma_destroy(rd.dir_entries); return error; }