summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:59:04 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:40:45 -0700
commit9fc504f372b56a2d5c64ecfaa95c4ce086342113 (patch)
tree4862091060149b6b5e276edd903d8c3c01779d37
parentcb6b925a14ff326f54c22ab59d34eced3389eb77 (diff)
xfs: create temporary files and directories for online repair
Teach the online repair code how to create temporary files or directories. These temporary files can be used to stage reconstructed information until we're ready to perform an atomic extent swap to commit the new metadata. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/scrub.c7
-rw-r--r--fs/xfs/scrub/scrub.h8
-rw-r--r--fs/xfs/scrub/tempfile.c213
-rw-r--r--fs/xfs/scrub/tempfile.h20
-rw-r--r--fs/xfs/scrub/trace.h33
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode.h1
8 files changed, 284 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a66550d89309..7455c1360548 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -184,6 +184,7 @@ xfs-y += $(addprefix scrub/, \
repair.o \
rmap_repair.o \
symlink_repair.o \
+ tempfile.o \
)
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 29d1ae5476e5..1ba85edbf694 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -19,12 +19,14 @@
#include "xfs_scrub.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
+#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/health.h"
#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
/*
* Online Scrub and Repair
@@ -168,6 +170,10 @@ xchk_teardown(
xfs_irele(sc->ip);
sc->ip = NULL;
}
+ if (sc->flags & XREP_ATOMIC_EXCHANGE) {
+ xfs_xchg_range_rele_log_assist(sc->mp);
+ sc->flags &= ~XREP_ATOMIC_EXCHANGE;
+ }
if (sc->flags & XCHK_FS_FROZEN) {
int err2 = xchk_fs_thaw(sc);
@@ -189,6 +195,7 @@ xchk_teardown(
sc->buf_cleanup = NULL;
sc->buf = NULL;
}
+ xrep_tempfile_rele(sc);
return error;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index bde10b43260a..08c788db2c4f 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -94,6 +94,10 @@ struct xfs_scrub {
/* Lock flags for @ip. */
uint ilock_flags;
+ /* A temporary file on this filesystem, for staging new metadata. */
+ struct xfs_inode *tempip;
+ uint temp_ilock_flags;
+
/* See the XCHK/XREP state flags below. */
unsigned int flags;
@@ -112,6 +116,10 @@ struct xfs_scrub {
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XCHK_FS_FROZEN (1 << 3) /* we froze the fs to do things */
+
+/* uses atomic metadata file content exchange */
+#define XREP_ATOMIC_EXCHANGE (1 << 29)
+
#define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
new file mode 100644
index 000000000000..5ff376d228db
--- /dev/null
+++ b/fs/xfs/scrub/tempfile.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_quota.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_dir2.h"
+#include "xfs_xchgrange.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/tempfile.h"
+
+/*
+ * Create a temporary file for reconstructing metadata, with the intention of
+ * atomically swapping the temporary file's contents with the file that's
+ * being repaired.
+ */
+int
+xrep_tempfile_create(
+ struct xfs_scrub *sc,
+ uint16_t mode)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_trans *tp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ struct xfs_trans_res *tres;
+ struct xfs_inode *dp = mp->m_rootip;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ bool is_dir = S_ISDIR(mode);
+ bool use_log = false;
+ int error;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+ if (xfs_is_readonly(mp))
+ return -EROFS;
+
+ ASSERT(sc->tp == NULL);
+ ASSERT(sc->tempip == NULL);
+
+ /* Enable atomic extent swapping. */
+ error = xfs_xchg_range_grab_log_assist(mp, true, &use_log);
+ if (error)
+ return error;
+ ASSERT(use_log);
+ sc->flags |= XREP_ATOMIC_EXCHANGE;
+
+ /*
+ * Make sure that we have allocated dquot(s) on disk. The temporary
+ * inode should be completely root owned so that we don't fail due to
+ * quota limits.
+ */
+ error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
+ XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
+ if (error)
+ return error;
+
+ if (is_dir) {
+ resblks = XFS_MKDIR_SPACE_RES(mp, 0);
+ tres = &M_RES(mp)->tr_mkdir;
+ } else {
+ resblks = XFS_IALLOC_SPACE_RES(mp);
+ tres = &M_RES(mp)->tr_create_tmpfile;
+ }
+
+ error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
+ &tp);
+ if (error)
+ goto out_release_dquots;
+
+ /* Allocate inode, set up directory. */
+ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ if (error)
+ goto out_trans_cancel;
+ error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0,
+ 0, false, &sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ /* Change the ownership of the inode to root. */
+ VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
+ xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
+
+ /*
+ * Mark our temporary file as private so that LSMs and the ACL code
+ * don't try to add their own metadata or reason about these files.
+ * The file should never be exposed to userspace.
+ */
+ VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
+ VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
+
+ if (is_dir) {
+ error = xfs_dir_init(tp, sc->tempip, dp);
+ if (error)
+ goto out_trans_cancel;
+ }
+
+ /*
+ * Attach the dquot(s) to the inodes and modify them incore.
+ * These ids of the inode couldn't have changed since the new
+ * inode has been locked ever since it was created.
+ */
+ xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
+
+ /*
+ * Put our temp file on the unlinked list so it's purged automatically.
+ * Anything being reconstructed using this file must be atomically
+ * swapped with the original file because the contents here will be
+ * purged when the inode is dropped or log recovery cleans out the
+ * unlinked list.
+ */
+ error = xfs_iunlink(tp, sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out_release_inode;
+
+ trace_xrep_tempfile_create(sc);
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ /* Finish setting up the incore / vfs context. */
+ xfs_setup_iops(sc->tempip);
+ xfs_finish_inode_setup(sc->tempip);
+
+ sc->temp_ilock_flags = 0;
+ return error;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+out_release_inode:
+ /*
+ * Wait until after the current transaction is aborted to finish the
+ * setup of the inode and release the inode. This prevents recursive
+ * transactions and deadlocks from xfs_inactive.
+ */
+ if (sc->tempip) {
+ xfs_finish_inode_setup(sc->tempip);
+ xfs_irele(sc->tempip);
+ }
+out_release_dquots:
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return error;
+}
+
+void
+xrep_tempfile_ilock(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ sc->temp_ilock_flags |= ilock_flags;
+ xfs_ilock(sc->tempip, ilock_flags);
+}
+
+bool
+xrep_tempfile_ilock_nowait(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ if (xfs_ilock_nowait(sc->tempip, ilock_flags)) {
+ sc->temp_ilock_flags |= ilock_flags;
+ return true;
+ }
+
+ return false;
+}
+
+void
+xrep_tempfile_iunlock(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ xfs_iunlock(sc->tempip, ilock_flags);
+ sc->temp_ilock_flags &= ~ilock_flags;
+}
+
+/* Release the temporary file. */
+void
+xrep_tempfile_rele(
+ struct xfs_scrub *sc)
+{
+ if (!sc->tempip)
+ return;
+
+ if (sc->temp_ilock_flags)
+ xrep_tempfile_iunlock(sc, sc->temp_ilock_flags);
+ xfs_irele(sc->tempip);
+ sc->tempip = NULL;
+}
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
new file mode 100644
index 000000000000..a6a4c8d6a373
--- /dev/null
+++ b/fs/xfs/scrub/tempfile.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_TEMPFILE_H__
+#define __XFS_SCRUB_TEMPFILE_H__
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode);
+void xrep_tempfile_rele(struct xfs_scrub *sc);
+
+void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags);
+bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags);
+void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
+#else
+# define xrep_tempfile_rele(sc)
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
+#endif /* __XFS_SCRUB_TEMPFILE_H__ */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index f188e17cac49..d147c4f072fe 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1491,6 +1491,39 @@ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item);
DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot);
DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
+TRACE_EVENT(xrep_tempfile_create,
+ TP_PROTO(struct xfs_scrub *sc),
+ TP_ARGS(sc),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, type)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, inum)
+ __field(unsigned int, gen)
+ __field(unsigned int, flags)
+ __field(xfs_ino_t, temp_inum)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->ino = sc->file ? XFS_I(file_inode(sc->file))->i_ino : 0;
+ __entry->type = sc->sm->sm_type;
+ __entry->agno = sc->sm->sm_agno;
+ __entry->inum = sc->sm->sm_ino;
+ __entry->gen = sc->sm->sm_gen;
+ __entry->flags = sc->sm->sm_flags;
+ __entry->temp_inum = sc->tempip->i_ino;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx type %s inum 0x%llx gen 0x%x flags 0x%x temp_inum 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __entry->inum,
+ __entry->gen,
+ __entry->flags,
+ __entry->temp_inum)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2d895e7ed0a1..c82d581ce468 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -40,7 +40,6 @@
kmem_zone_t *xfs_inode_zone;
-STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *);
@@ -2174,7 +2173,7 @@ out:
* We place the on-disk inode on a list in the AGI. It will be pulled from this
* list when the inode is freed.
*/
-STATIC int
+int
xfs_iunlink(
struct xfs_trans *tp,
struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8c66bc8305a0..a8eaf18b5310 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -518,6 +518,7 @@ bool xfs_inode_needs_inactive(struct xfs_inode *ip);
int xfs_iunlink_init(struct xfs_perag *pag);
void xfs_iunlink_destroy(struct xfs_perag *pag);
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_end_io(struct work_struct *work);