summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/Makefile1
-rw-r--r--fs/xfs/scrub/scrub.c7
-rw-r--r--fs/xfs/scrub/scrub.h8
-rw-r--r--fs/xfs/scrub/tempfile.c213
-rw-r--r--fs/xfs/scrub/tempfile.h20
-rw-r--r--fs/xfs/scrub/trace.h33
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode.h1
8 files changed, 284 insertions, 2 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index a66550d89309..7455c1360548 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -184,6 +184,7 @@ xfs-y += $(addprefix scrub/, \
repair.o \
rmap_repair.o \
symlink_repair.o \
+ tempfile.o \
)
xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 29d1ae5476e5..1ba85edbf694 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -19,12 +19,14 @@
#include "xfs_scrub.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
+#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/health.h"
#include "scrub/xfile.h"
+#include "scrub/tempfile.h"
/*
* Online Scrub and Repair
@@ -168,6 +170,10 @@ xchk_teardown(
xfs_irele(sc->ip);
sc->ip = NULL;
}
+ if (sc->flags & XREP_ATOMIC_EXCHANGE) {
+ xfs_xchg_range_rele_log_assist(sc->mp);
+ sc->flags &= ~XREP_ATOMIC_EXCHANGE;
+ }
if (sc->flags & XCHK_FS_FROZEN) {
int err2 = xchk_fs_thaw(sc);
@@ -189,6 +195,7 @@ xchk_teardown(
sc->buf_cleanup = NULL;
sc->buf = NULL;
}
+ xrep_tempfile_rele(sc);
return error;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index bde10b43260a..08c788db2c4f 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -94,6 +94,10 @@ struct xfs_scrub {
/* Lock flags for @ip. */
uint ilock_flags;
+ /* A temporary file on this filesystem, for staging new metadata. */
+ struct xfs_inode *tempip;
+ uint temp_ilock_flags;
+
/* See the XCHK/XREP state flags below. */
unsigned int flags;
@@ -112,6 +116,10 @@ struct xfs_scrub {
#define XCHK_TRY_HARDER (1 << 0) /* can't get resources, try again */
#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XCHK_FS_FROZEN (1 << 3) /* we froze the fs to do things */
+
+/* uses atomic metadata file content exchange */
+#define XREP_ATOMIC_EXCHANGE (1 << 29)
+
#define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
new file mode 100644
index 000000000000..5ff376d228db
--- /dev/null
+++ b/fs/xfs/scrub/tempfile.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_ialloc.h"
+#include "xfs_quota.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_dir2.h"
+#include "xfs_xchgrange.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/tempfile.h"
+
+/*
+ * Create a temporary file for reconstructing metadata, with the intention of
+ * atomically swapping the temporary file's contents with the file that's
+ * being repaired.
+ */
+int
+xrep_tempfile_create(
+ struct xfs_scrub *sc,
+ uint16_t mode)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_trans *tp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ struct xfs_trans_res *tres;
+ struct xfs_inode *dp = mp->m_rootip;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ bool is_dir = S_ISDIR(mode);
+ bool use_log = false;
+ int error;
+
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+ if (xfs_is_readonly(mp))
+ return -EROFS;
+
+ ASSERT(sc->tp == NULL);
+ ASSERT(sc->tempip == NULL);
+
+ /* Enable atomic extent swapping. */
+ error = xfs_xchg_range_grab_log_assist(mp, true, &use_log);
+ if (error)
+ return error;
+ ASSERT(use_log);
+ sc->flags |= XREP_ATOMIC_EXCHANGE;
+
+ /*
+ * Make sure that we have allocated dquot(s) on disk. The temporary
+ * inode should be completely root owned so that we don't fail due to
+ * quota limits.
+ */
+ error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
+ XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
+ if (error)
+ return error;
+
+ if (is_dir) {
+ resblks = XFS_MKDIR_SPACE_RES(mp, 0);
+ tres = &M_RES(mp)->tr_mkdir;
+ } else {
+ resblks = XFS_IALLOC_SPACE_RES(mp);
+ tres = &M_RES(mp)->tr_create_tmpfile;
+ }
+
+ error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
+ &tp);
+ if (error)
+ goto out_release_dquots;
+
+ /* Allocate inode, set up directory. */
+ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ if (error)
+ goto out_trans_cancel;
+ error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0,
+ 0, false, &sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ /* Change the ownership of the inode to root. */
+ VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
+ xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
+
+ /*
+ * Mark our temporary file as private so that LSMs and the ACL code
+ * don't try to add their own metadata or reason about these files.
+ * The file should never be exposed to userspace.
+ */
+ VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
+ VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
+
+ if (is_dir) {
+ error = xfs_dir_init(tp, sc->tempip, dp);
+ if (error)
+ goto out_trans_cancel;
+ }
+
+ /*
+ * Attach the dquot(s) to the inodes and modify them incore.
+ * These ids of the inode couldn't have changed since the new
+ * inode has been locked ever since it was created.
+ */
+ xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
+
+ /*
+ * Put our temp file on the unlinked list so it's purged automatically.
+ * Anything being reconstructed using this file must be atomically
+ * swapped with the original file because the contents here will be
+ * purged when the inode is dropped or log recovery cleans out the
+ * unlinked list.
+ */
+ error = xfs_iunlink(tp, sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out_release_inode;
+
+ trace_xrep_tempfile_create(sc);
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ /* Finish setting up the incore / vfs context. */
+ xfs_setup_iops(sc->tempip);
+ xfs_finish_inode_setup(sc->tempip);
+
+ sc->temp_ilock_flags = 0;
+ return error;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+out_release_inode:
+ /*
+ * Wait until after the current transaction is aborted to finish the
+ * setup of the inode and release the inode. This prevents recursive
+ * transactions and deadlocks from xfs_inactive.
+ */
+ if (sc->tempip) {
+ xfs_finish_inode_setup(sc->tempip);
+ xfs_irele(sc->tempip);
+ }
+out_release_dquots:
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return error;
+}
+
+void
+xrep_tempfile_ilock(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ sc->temp_ilock_flags |= ilock_flags;
+ xfs_ilock(sc->tempip, ilock_flags);
+}
+
+bool
+xrep_tempfile_ilock_nowait(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ if (xfs_ilock_nowait(sc->tempip, ilock_flags)) {
+ sc->temp_ilock_flags |= ilock_flags;
+ return true;
+ }
+
+ return false;
+}
+
+void
+xrep_tempfile_iunlock(
+ struct xfs_scrub *sc,
+ unsigned int ilock_flags)
+{
+ xfs_iunlock(sc->tempip, ilock_flags);
+ sc->temp_ilock_flags &= ~ilock_flags;
+}
+
+/* Release the temporary file. */
+void
+xrep_tempfile_rele(
+ struct xfs_scrub *sc)
+{
+ if (!sc->tempip)
+ return;
+
+ if (sc->temp_ilock_flags)
+ xrep_tempfile_iunlock(sc, sc->temp_ilock_flags);
+ xfs_irele(sc->tempip);
+ sc->tempip = NULL;
+}
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
new file mode 100644
index 000000000000..a6a4c8d6a373
--- /dev/null
+++ b/fs/xfs/scrub/tempfile.h
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_TEMPFILE_H__
+#define __XFS_SCRUB_TEMPFILE_H__
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode);
+void xrep_tempfile_rele(struct xfs_scrub *sc);
+
+void xrep_tempfile_ilock(struct xfs_scrub *sc, unsigned int ilock_flags);
+bool xrep_tempfile_ilock_nowait(struct xfs_scrub *sc, unsigned int ilock_flags);
+void xrep_tempfile_iunlock(struct xfs_scrub *sc, unsigned int ilock_flags);
+#else
+# define xrep_tempfile_rele(sc)
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
+
+#endif /* __XFS_SCRUB_TEMPFILE_H__ */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index f188e17cac49..d147c4f072fe 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1491,6 +1491,39 @@ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item);
DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot);
DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
+TRACE_EVENT(xrep_tempfile_create,
+ TP_PROTO(struct xfs_scrub *sc),
+ TP_ARGS(sc),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, type)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, inum)
+ __field(unsigned int, gen)
+ __field(unsigned int, flags)
+ __field(xfs_ino_t, temp_inum)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->ino = sc->file ? XFS_I(file_inode(sc->file))->i_ino : 0;
+ __entry->type = sc->sm->sm_type;
+ __entry->agno = sc->sm->sm_agno;
+ __entry->inum = sc->sm->sm_ino;
+ __entry->gen = sc->sm->sm_gen;
+ __entry->flags = sc->sm->sm_flags;
+ __entry->temp_inum = sc->tempip->i_ino;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx type %s inum 0x%llx gen 0x%x flags 0x%x temp_inum 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __entry->inum,
+ __entry->gen,
+ __entry->flags,
+ __entry->temp_inum)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2d895e7ed0a1..c82d581ce468 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -40,7 +40,6 @@
kmem_zone_t *xfs_inode_zone;
-STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *);
@@ -2174,7 +2173,7 @@ out:
* We place the on-disk inode on a list in the AGI. It will be pulled from this
* list when the inode is freed.
*/
-STATIC int
+int
xfs_iunlink(
struct xfs_trans *tp,
struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8c66bc8305a0..a8eaf18b5310 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -518,6 +518,7 @@ bool xfs_inode_needs_inactive(struct xfs_inode *ip);
int xfs_iunlink_init(struct xfs_perag *pag);
void xfs_iunlink_destroy(struct xfs_perag *pag);
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_end_io(struct work_struct *work);