summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/scrub/repair.c148
-rw-r--r--fs/xfs/scrub/repair.h2
-rw-r--r--fs/xfs/scrub/scrub.c11
-rw-r--r--fs/xfs/scrub/scrub.h8
-rw-r--r--fs/xfs/scrub/trace.h33
-rw-r--r--fs/xfs/xfs_inode.c3
-rw-r--r--fs/xfs/xfs_inode.h1
7 files changed, 204 insertions, 2 deletions
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 85c17a76f9ce..a510e0fd5602 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -36,6 +36,10 @@
#include "xfs_extfree_item.h"
#include "xfs_reflink.h"
#include "xfs_health.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_dir2.h"
+#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -1705,3 +1709,147 @@ out:
sc->sm->sm_flags = smflags;
return error;
}
+
+/*
+ * Create a temporary file for reconstructing metadata, with the intention of
+ * atomically swapping the temporary file's contents with the file that's
+ * being repaired.
+ */
+int
+xrep_setup_tempfile(
+ struct xfs_scrub *sc,
+ uint16_t mode)
+{
+ struct xfs_mount *mp = sc->mp;
+ struct xfs_trans *tp = NULL;
+ struct xfs_dquot *udqp = NULL;
+ struct xfs_dquot *gdqp = NULL;
+ struct xfs_dquot *pdqp = NULL;
+ struct xfs_trans_res *tres;
+ struct xfs_inode *dp = mp->m_rootip;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ bool is_dir = S_ISDIR(mode);
+ bool use_log = false;
+ int error;
+
+ if (!(sc->sm->sm_flags & XFS_SCRUB_IFLAG_REPAIR))
+ return 0;
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ ASSERT(sc->tp == NULL);
+ ASSERT(sc->tempip == NULL);
+
+ /* Enable atomic extent swapping. */
+ error = xfs_xchg_range_grab_log_assist(mp, true, &use_log);
+ if (error)
+ return error;
+ ASSERT(use_log);
+ sc->flags |= XREP_ATOMIC_EXCHANGE;
+
+ /*
+ * Make sure that we have allocated dquot(s) on disk. The temporary
+ * inode should be completely root owned so that we don't fail due to
+ * quota limits.
+ */
+ error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
+ XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
+ if (error)
+ return error;
+
+ if (is_dir) {
+ resblks = XFS_MKDIR_SPACE_RES(mp, 0);
+ tres = &M_RES(mp)->tr_mkdir;
+ } else {
+ resblks = XFS_IALLOC_SPACE_RES(mp);
+ tres = &M_RES(mp)->tr_create_tmpfile;
+ }
+
+ error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
+ &tp);
+ if (error)
+ goto out_release_dquots;
+
+ /* Allocate inode, set up directory. */
+ error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
+ if (error)
+ goto out_trans_cancel;
+ error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0,
+ 0, false, &sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ /* Change the ownership of the inode to root. */
+ VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
+ xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);
+
+ /*
+ * Mark our temporary file as private so that LSMs and the ACL code
+ * don't try to add their own metadata or reason about these files.
+ * The file should never be exposed to userspace.
+ */
+ VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
+ VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;
+
+ if (is_dir) {
+ error = xfs_dir_init(tp, sc->tempip, dp);
+ if (error)
+ goto out_trans_cancel;
+ }
+
+ /*
+ * Attach the dquot(s) to the inodes and modify them incore.
+ * These ids of the inode couldn't have changed since the new
+ * inode has been locked ever since it was created.
+ */
+ xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);
+
+ /*
+ * Put our temp file on the unlinked list so it's purged automatically.
+ * Anything being reconstructed using this file must be atomically
+ * swapped with the original file because the contents here will be
+ * purged when the inode is dropped or log recovery cleans out the
+ * unlinked list.
+ */
+ error = xfs_iunlink(tp, sc->tempip);
+ if (error)
+ goto out_trans_cancel;
+
+ error = xfs_trans_commit(tp);
+ if (error)
+ goto out_release_inode;
+
+ trace_xrep_setup_tempfile(sc);
+
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ /* Finish setting up the incore / vfs context. */
+ xfs_setup_iops(sc->tempip);
+ xfs_finish_inode_setup(sc->tempip);
+
+ sc->temp_ilock_flags = 0;
+ return error;
+
+out_trans_cancel:
+ xfs_trans_cancel(tp);
+out_release_inode:
+ /*
+ * Wait until after the current transaction is aborted to finish the
+ * setup of the inode and release the inode. This prevents recursive
+ * transactions and deadlocks from xfs_inactive.
+ */
+ if (sc->tempip) {
+ xfs_finish_inode_setup(sc->tempip);
+ xfs_irele(sc->tempip);
+ }
+out_release_dquots:
+ xfs_qm_dqrele(udqp);
+ xfs_qm_dqrele(gdqp);
+ xfs_qm_dqrele(pdqp);
+
+ return error;
+}
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 5fc2ae5c18f2..9f9ab14abd39 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -35,6 +35,7 @@ int xrep_alloc_ag_block(struct xfs_scrub *sc,
int xrep_init_btblock(struct xfs_scrub *sc, xfs_fsblock_t fsb,
struct xfs_buf **bpp, xfs_btnum_t btnum,
const struct xfs_buf_ops *ops);
+int xrep_setup_tempfile(struct xfs_scrub *sc, uint16_t mode);
struct xbitmap;
@@ -192,6 +193,7 @@ xrep_rmapbt_setup(
return xchk_setup_ag_btree(sc, false);
}
+#define xrep_setup_tempfile(sc, mode) (0)
#define xrep_revalidate_allocbt (NULL)
#define xrep_revalidate_iallocbt (NULL)
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 056da154448f..16bb8528bb4e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -19,6 +19,7 @@
#include "xfs_scrub.h"
#include "xfs_btree.h"
#include "xfs_btree_staging.h"
+#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -168,6 +169,10 @@ xchk_teardown(
xfs_irele(sc->ip);
sc->ip = NULL;
}
+ if (sc->flags & XREP_ATOMIC_EXCHANGE) {
+ xfs_xchg_range_rele_log_assist(sc->mp);
+ sc->flags &= ~XREP_ATOMIC_EXCHANGE;
+ }
if (sc->flags & XCHK_FS_FROZEN) {
int err2 = xchk_fs_thaw(sc);
@@ -193,6 +198,12 @@ xchk_teardown(
sc->buf_cleanup = NULL;
sc->buf = NULL;
}
+ if (sc->tempip) {
+ if (sc->temp_ilock_flags)
+ xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
+ xfs_irele(sc->tempip);
+ sc->tempip = NULL;
+ }
return error;
}
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 1bf0e2243902..95d9b2074e6c 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -94,6 +94,10 @@ struct xfs_scrub {
/* Lock flags for @ip. */
uint ilock_flags;
+ /* A temporary file on this filesystem, for staging new metadata. */
+ struct xfs_inode *tempip;
+ uint temp_ilock_flags;
+
/* See the XCHK/XREP state flags below. */
unsigned int flags;
@@ -113,6 +117,10 @@ struct xfs_scrub {
#define XCHK_HAS_QUOTAOFFLOCK (1 << 1) /* we hold the quotaoff lock */
#define XCHK_REAPING_DISABLED (1 << 2) /* background block reaping paused */
#define XCHK_FS_FROZEN (1 << 3) /* we froze the fs to do things */
+
+/* uses atomic metadata file content exchange */
+#define XREP_ATOMIC_EXCHANGE (1 << 29)
+
#define XREP_RESET_PERAG_RESV (1 << 30) /* must reset AG space reservation */
#define XREP_ALREADY_FIXED (1 << 31) /* checking our repair work */
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 3a0f29b54938..e40d3686940b 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -1410,6 +1410,39 @@ DEFINE_XREP_DQUOT_EVENT(xrep_dquot_item);
DEFINE_XREP_DQUOT_EVENT(xrep_disk_dquot);
DEFINE_XREP_DQUOT_EVENT(xrep_quotacheck_dquot);
+TRACE_EVENT(xrep_setup_tempfile,
+ TP_PROTO(struct xfs_scrub *sc),
+ TP_ARGS(sc),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, ino)
+ __field(unsigned int, type)
+ __field(xfs_agnumber_t, agno)
+ __field(xfs_ino_t, inum)
+ __field(unsigned int, gen)
+ __field(unsigned int, flags)
+ __field(xfs_ino_t, temp_inum)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->ino = sc->file ? XFS_I(file_inode(sc->file))->i_ino : 0;
+ __entry->type = sc->sm->sm_type;
+ __entry->agno = sc->sm->sm_agno;
+ __entry->inum = sc->sm->sm_ino;
+ __entry->gen = sc->sm->sm_gen;
+ __entry->flags = sc->sm->sm_flags;
+ __entry->temp_inum = sc->tempip->i_ino;
+ ),
+ TP_printk("dev %d:%d ino 0x%llx type %s inum 0x%llx gen 0x%x flags 0x%x temp_inum 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->ino,
+ __print_symbolic(__entry->type, XFS_SCRUB_TYPE_STRINGS),
+ __entry->inum,
+ __entry->gen,
+ __entry->flags,
+ __entry->temp_inum)
+);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2d895e7ed0a1..c82d581ce468 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -40,7 +40,6 @@
kmem_zone_t *xfs_inode_zone;
-STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
STATIC int xfs_iunlink_remove(struct xfs_trans *tp, struct xfs_perag *pag,
struct xfs_inode *);
@@ -2174,7 +2173,7 @@ out:
* We place the on-disk inode on a list in the AGI. It will be pulled from this
* list when the inode is freed.
*/
-STATIC int
+int
xfs_iunlink(
struct xfs_trans *tp,
struct xfs_inode *ip)
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 8c66bc8305a0..a8eaf18b5310 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -518,6 +518,7 @@ bool xfs_inode_needs_inactive(struct xfs_inode *ip);
int xfs_iunlink_init(struct xfs_perag *pag);
void xfs_iunlink_destroy(struct xfs_perag *pag);
+int xfs_iunlink(struct xfs_trans *tp, struct xfs_inode *ip);
void xfs_end_io(struct work_struct *work);