diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/xfs/Makefile | 1 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.c | 7 | ||||
-rw-r--r-- | fs/xfs/scrub/scrub.h | 8 | ||||
-rw-r--r-- | fs/xfs/scrub/tempfile.c | 213 | ||||
-rw-r--r-- | fs/xfs/scrub/tempfile.h | 20 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 33 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 1 |
8 files changed, 284 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index a66550d89309..7455c1360548 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -184,6 +184,7 @@ xfs-y += $(addprefix scrub/, \ repair.o \ rmap_repair.o \ symlink_repair.o \ + tempfile.o \ ) xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 29d1ae5476e5..1ba85edbf694 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -19,12 +19,14 @@ #include "xfs_scrub.h" #include "xfs_btree.h" #include "xfs_btree_staging.h" +#include "xfs_xchgrange.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/health.h" #include "scrub/xfile.h" +#include "scrub/tempfile.h" /* * Online Scrub and Repair @@ -168,6 +170,10 @@ xchk_teardown( xfs_irele(sc->ip); sc->ip = NULL; } + if (sc->flags & XREP_ATOMIC_EXCHANGE) { + xfs_xchg_range_rele_log_assist(sc->mp); + sc->flags &= ~XREP_ATOMIC_EXCHANGE; + } if (sc->flags & XCHK_FS_FROZEN) { int err2 = xchk_fs_thaw(sc); @@ -189,6 +195,7 @@ xchk_teardown( sc->buf_cleanup = NULL; sc->buf = NULL; } + xrep_tempfile_rele(sc); return error; } diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index bde10b43260a..08c788db2c4f 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -94,6 +94,10 @@ struct xfs_scrub { /* Lock flags for @ip. */ uint ilock_flags; + /* A temporary file on this filesystem, for staging new metadata. */ + struct xfs_inode *tempip; + uint temp_ilock_flags; + /* See the XCHK/XREP state flags below. */ unsigned int flags; @@ -112,6 +116,10 @@ struct xfs_scrub { #define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */ #define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */ #define XCHK_FS_FROZEN (1 << 3) /* we froze the fs to do things */ + +/* uses atomic metadata file content exchange */ +#define XREP_ATOMIC_EXCHANGE (1 << 29) + #define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */ #define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c new file mode 100644 index 000000000000..5ff376d228db --- /dev/null +++ b/fs/xfs/scrub/tempfile.c @@ -0,0 +1,213 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_ialloc.h" +#include "xfs_quota.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_dir2.h" +#include "xfs_xchgrange.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/tempfile.h" + +/* + * Create a temporary file for reconstructing metadata, with the intention of + * atomically swapping the temporary file's contents with the file that's + * being repaired. + */ +int +xrep_tempfile_create( + struct xfs_scrub *sc, + uint16_t mode) +{ + struct xfs_mount *mp = sc->mp; + struct xfs_trans *tp = NULL; + struct xfs_dquot *udqp = NULL; + struct xfs_dquot *gdqp = NULL; + struct xfs_dquot *pdqp = NULL; + struct xfs_trans_res *tres; + struct xfs_inode *dp = mp->m_rootip; + xfs_ino_t ino; + unsigned int resblks; + bool is_dir = S_ISDIR(mode); + bool use_log = false; + int error; + + if (xfs_is_shutdown(mp)) + return -EIO; + if (xfs_is_readonly(mp)) + return -EROFS; + + ASSERT(sc->tp == NULL); + ASSERT(sc->tempip == NULL); + + /* Enable atomic extent swapping. */ + error = xfs_xchg_range_grab_log_assist(mp, true, &use_log); + if (error) + return error; + ASSERT(use_log); + sc->flags |= XREP_ATOMIC_EXCHANGE; + + /* + * Make sure that we have allocated dquot(s) on disk. The temporary + * inode should be completely root owned so that we don't fail due to + * quota limits. + */ + error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, + XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); + if (error) + return error; + + if (is_dir) { + resblks = XFS_MKDIR_SPACE_RES(mp, 0); + tres = &M_RES(mp)->tr_mkdir; + } else { + resblks = XFS_IALLOC_SPACE_RES(mp); + tres = &M_RES(mp)->tr_create_tmpfile; + } + + error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, + &tp); + if (error) + goto out_release_dquots; + + /* Allocate inode, set up directory. */ + error = xfs_dialloc(&tp, dp->i_ino, mode, &ino); + if (error) + goto out_trans_cancel; + error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0, + 0, false, &sc->tempip); + if (error) + goto out_trans_cancel; + + /* Change the ownership of the inode to root. */ + VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID; + VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID; + xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); + + /* + * Mark our temporary file as private so that LSMs and the ACL code + * don't try to add their own metadata or reason about these files. + * The file should never be exposed to userspace. + */ + VFS_I(sc->tempip)->i_flags |= S_PRIVATE; + VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR; + + if (is_dir) { + error = xfs_dir_init(tp, sc->tempip, dp); + if (error) + goto out_trans_cancel; + } + + /* + * Attach the dquot(s) to the inodes and modify them incore. + * These ids of the inode couldn't have changed since the new + * inode has been locked ever since it was created. + */ + xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp); + + /* + * Put our temp file on the unlinked list so it's purged automatically. + * Anything being reconstructed using this file must be atomically + * swapped with the original file because the contents here will be + * purged when the inode is dropped or log recovery cleans out the + * unlinked list. + */ + error = xfs_iunlink(tp, sc->tempip); + if (error) + goto out_trans_cancel; + + error = xfs_trans_commit(tp); + if (error) + goto out_release_inode; + + trace_xrep_tempfile_create(sc); + + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); + + /* Finish setting up the incore / vfs context. */ + xfs_setup_iops(sc->tempip); + xfs_finish_inode_setup(sc->tempip); + + sc->temp_ilock_flags = 0; + return error; + +out_trans_cancel: + xfs_trans_cancel(tp); +out_release_inode: + /* + * Wait until after the current transaction is aborted to finish the + * setup of the inode and release the inode. This prevents recursive + * transactions and deadlocks from xfs_inactive. + */ + if (sc->tempip) { + xfs_finish_inode_setup(sc->tempip); + xfs_irele(sc->tempip); + } +out_release_dquots: + xfs_qm_dqrele(udqp); + xfs_qm_dqrele(gdqp); + xfs_qm_dqrele(pdqp); + + return error; +} + +void +xrep_tempfile_ilock( + struct xfs_scrub *sc, + unsigned int ilock_flags) +{ + sc->temp_ilock_flags |= ilock_flags; + xfs_ilock(sc->tempip, ilock_flags); +} + +bool +xrep_tempfile_ilock_nowait( + struct xfs_scrub *sc, + unsigned int ilock_flags) +{ + if (xfs_ilock_nowait(sc->tempip, ilock_flags)) { + sc->temp_ilock_flags |= ilock_flags; + return true; + } + + return false; +} + +void +xrep_tempfile_iunlock( + struct xfs_scrub *sc, + unsigned int ilock_flags) +{ + xfs_iunlock(sc->tempip, ilock_flags); + sc->temp_ilock_flags &= ~ilock_flags; +} + +/* Release the temporary file. */ +void +xrep_tempfile_rele( + struct xfs_scrub *sc) +{ + if (!sc->tempip) + return; + + if (sc->temp_ilock_flags) + xrep_tempfile_iunlock(sc, sc->temp_ilock_flags); + xfs_irele(sc->tempip); + sc->tempip = NULL; +} diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h new file mode 100644 index 000000000000..a6a4c8d6a373 --- /dev/null +++ b/fs/xfs/scrub/tempfile.h @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (C) 2021 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_SCRUB_TEMPFILE_H__ +#define __XFS_SCRUB_TEMPFILE_H__ + +#ifdef CONFIG_XFS_ONLINE_REPAIR +int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode); +void xrep_tempfile_rele(struct xfs_scrub *sc); + +void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags); +bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags); +void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags); +#else +# define xrep_tempfile_rele(sc) +#endif /* CONFIG_XFS_ONLINE_REPAIR */ + +#endif /* __XFS_SCRUB_TEMPFILE_H__ */ diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index f188e17cac49..d147c4f072fe 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -1491,6 +1491,39 @@ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item); DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot); DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot); +TRACE_EVENT(xrep_tempfile_create, + TP_PROTO(struct xfs_scrub *sc), + TP_ARGS(sc), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, ino) + __field(unsigned int, type) + __field(xfs_agnumber_t, agno) + __field(xfs_ino_t, inum) + __field(unsigned int, gen) + __field(unsigned int, flags) + __field(xfs_ino_t, temp_inum) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->ino = sc->file ? XFS_I(file_inode(sc->file))->i_ino : 0; + __entry->type = sc->sm->sm_type; + __entry->agno = sc->sm->sm_agno; + __entry->inum = sc->sm->sm_ino; + __entry->gen = sc->sm->sm_gen; + __entry->flags = sc->sm->sm_flags; + __entry->temp_inum = sc->tempip->i_ino; + ), + TP_printk("dev %d:%d ino 0x%llx type %s inum 0x%llx gen 0x%x flags 0x%x temp_inum 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, + __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS), + __entry->inum, + __entry->gen, + __entry->flags, + __entry->temp_inum) +); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2d895e7ed0a1..c82d581ce468 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -40,7 +40,6 @@ kmem_zone_t *xfs_inode_zone; -STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *); STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag, struct xfs_inode *); @@ -2174,7 +2173,7 @@ out: * We place the on-disk inode on a list in the AGI. It will be pulled from this * list when the inode is freed. */ -STATIC int +int xfs_iunlink( struct xfs_trans *tp, struct xfs_inode *ip) diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 8c66bc8305a0..a8eaf18b5310 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -518,6 +518,7 @@ bool xfs_inode_needs_inactive(struct xfs_inode *ip); int xfs_iunlink_init(struct xfs_perag *pag); void xfs_iunlink_destroy(struct xfs_perag *pag); +int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip); void xfs_end_io(struct work_struct *work); |