summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/xfs/Makefile5
-rw-r--r--fs/xfs/libxfs/xfs_attr.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c5
-rw-r--r--fs/xfs/libxfs/xfs_format.h119
-rw-r--r--fs/xfs/libxfs/xfs_fs.h25
-rw-r--r--fs/xfs/libxfs/xfs_health.h6
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c58
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c90
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_util.c2
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h2
-rw-r--r--fs/xfs/libxfs/xfs_metadir.c481
-rw-r--r--fs/xfs/libxfs/xfs_metadir.h47
-rw-r--r--fs/xfs/libxfs/xfs_metafile.c52
-rw-r--r--fs/xfs/libxfs/xfs_metafile.h31
-rw-r--r--fs/xfs/libxfs/xfs_ondisk.h2
-rw-r--r--fs/xfs/libxfs/xfs_sb.c12
-rw-r--r--fs/xfs/libxfs/xfs_types.c4
-rw-r--r--fs/xfs/libxfs/xfs_types.h2
-rw-r--r--fs/xfs/scrub/agheader.c5
-rw-r--r--fs/xfs/scrub/common.c65
-rw-r--r--fs/xfs/scrub/common.h5
-rw-r--r--fs/xfs/scrub/dir.c10
-rw-r--r--fs/xfs/scrub/dir_repair.c20
-rw-r--r--fs/xfs/scrub/dirtree.c32
-rw-r--r--fs/xfs/scrub/dirtree.h12
-rw-r--r--fs/xfs/scrub/findparent.c28
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/inode.c35
-rw-r--r--fs/xfs/scrub/inode_repair.c34
-rw-r--r--fs/xfs/scrub/metapath.c521
-rw-r--r--fs/xfs/scrub/nlinks.c4
-rw-r--r--fs/xfs/scrub/nlinks_repair.c4
-rw-r--r--fs/xfs/scrub/orphanage.c4
-rw-r--r--fs/xfs/scrub/parent.c39
-rw-r--r--fs/xfs/scrub/parent_repair.c37
-rw-r--r--fs/xfs/scrub/quotacheck.c7
-rw-r--r--fs/xfs/scrub/refcount_repair.c2
-rw-r--r--fs/xfs/scrub/repair.c14
-rw-r--r--fs/xfs/scrub/repair.h3
-rw-r--r--fs/xfs/scrub/scrub.c12
-rw-r--r--fs/xfs/scrub/scrub.h2
-rw-r--r--fs/xfs/scrub/stats.c1
-rw-r--r--fs/xfs/scrub/tempfile.c105
-rw-r--r--fs/xfs/scrub/tempfile.h3
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/scrub/trace.h42
-rw-r--r--fs/xfs/xfs_dquot.c1
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_health.c2
-rw-r--r--fs/xfs/xfs_icache.c74
-rw-r--r--fs/xfs/xfs_inode.c19
-rw-r--r--fs/xfs/xfs_inode.h36
-rw-r--r--fs/xfs/xfs_inode_item.c7
-rw-r--r--fs/xfs/xfs_inode_item_recover.c2
-rw-r--r--fs/xfs/xfs_ioctl.c7
-rw-r--r--fs/xfs/xfs_iops.c15
-rw-r--r--fs/xfs/xfs_itable.c33
-rw-r--r--fs/xfs/xfs_itable.h3
-rw-r--r--fs/xfs/xfs_message.c51
-rw-r--r--fs/xfs/xfs_message.h20
-rw-r--r--fs/xfs/xfs_mount.c31
-rw-r--r--fs/xfs/xfs_mount.h25
-rw-r--r--fs/xfs/xfs_pnfs.c3
-rw-r--r--fs/xfs/xfs_qm.c36
-rw-r--r--fs/xfs/xfs_quota.h5
-rw-r--r--fs/xfs/xfs_rtalloc.c38
-rw-r--r--fs/xfs/xfs_super.c13
-rw-r--r--fs/xfs/xfs_trace.c2
-rw-r--r--fs/xfs/xfs_trace.h102
-rw-r--r--fs/xfs/xfs_trans_dquot.c6
-rw-r--r--fs/xfs/xfs_xattr.c3
72 files changed, 2332 insertions, 205 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 94cb8ca9f9da..d80c2817eb48 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -16,6 +16,7 @@ xfs-y += xfs_trace.o
xfs-y += $(addprefix libxfs/, \
xfs_group.o \
xfs_ag.o \
+ xfs_ag_resv.o \
xfs_alloc.o \
xfs_alloc_btree.o \
xfs_attr.o \
@@ -43,7 +44,8 @@ xfs-y += $(addprefix libxfs/, \
xfs_inode_buf.o \
xfs_inode_util.o \
xfs_log_rlimit.o \
- xfs_ag_resv.o \
+ xfs_metadir.o \
+ xfs_metafile.o \
xfs_parent.o \
xfs_rmap.o \
xfs_rmap_btree.o \
@@ -172,6 +174,7 @@ xfs-y += $(addprefix scrub/, \
inode.o \
iscan.o \
listxattr.o \
+ metapath.o \
nlinks.o \
parent.o \
readdir.o \
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index c63da14eee04..17875ad865f5 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -1004,7 +1004,10 @@ xfs_attr_add_fork(
unsigned int blks; /* space reservation */
int error; /* error return value */
- ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
blks = XFS_ADDAFORK_SPACE_RES(mp);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 5eda036cf9bf..7805a36e98c4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -1042,7 +1042,10 @@ xfs_bmap_add_attrfork(
int error; /* error return value */
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
- ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
+ if (xfs_is_metadir_inode(ip))
+ ASSERT(XFS_IS_DQDETACHED(ip));
+ else
+ ASSERT(!XFS_NOT_DQATTACHED(mp, ip));
ASSERT(!xfs_inode_has_attr_fork(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index e1bfee0c3b1a..616f81045921 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -174,6 +174,8 @@ typedef struct xfs_sb {
xfs_lsn_t sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
+ xfs_ino_t sb_metadirino; /* metadata directory tree root */
+
/* must be padded to 64 bit alignment */
} xfs_sb_t;
@@ -259,6 +261,8 @@ struct xfs_dsb {
__be64 sb_lsn; /* last write sequence */
uuid_t sb_meta_uuid; /* metadata file system unique id */
+ __be64 sb_metadirino; /* metadata directory tree root */
+
/* must be padded to 64 bit alignment */
};
@@ -278,7 +282,7 @@ struct xfs_dsb {
#define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS)
-static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
+static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp)
{
return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5;
}
@@ -287,12 +291,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp)
* Detect a mismatched features2 field. Older kernels read/wrote
* this into the wrong slot, so to be safe we keep them in sync.
*/
-static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp)
+static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp)
{
return sbp->sb_bad_features2 != sbp->sb_features2;
}
-static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) ||
(sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT);
@@ -342,8 +346,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp)
#define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL
static inline bool
xfs_sb_has_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_compat & feature) != 0;
}
@@ -360,8 +364,8 @@ xfs_sb_has_compat_feature(
#define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL
static inline bool
xfs_sb_has_ro_compat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_ro_compat & feature) != 0;
}
@@ -374,6 +378,7 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */
#define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */
#define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */
+#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */
#define XFS_SB_FEAT_INCOMPAT_ALL \
(XFS_SB_FEAT_INCOMPAT_FTYPE | \
XFS_SB_FEAT_INCOMPAT_SPINODES | \
@@ -387,8 +392,8 @@ xfs_sb_has_ro_compat_feature(
#define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL
static inline bool
xfs_sb_has_incompat_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_incompat & feature) != 0;
}
@@ -399,8 +404,8 @@ xfs_sb_has_incompat_feature(
#define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL
static inline bool
xfs_sb_has_incompat_log_feature(
- struct xfs_sb *sbp,
- uint32_t feature)
+ const struct xfs_sb *sbp,
+ uint32_t feature)
{
return (sbp->sb_features_log_incompat & feature) != 0;
}
@@ -420,7 +425,7 @@ xfs_sb_add_incompat_log_features(
sbp->sb_features_log_incompat |= features;
}
-static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp)
+static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp)
{
return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat &
XFS_SB_FEAT_INCOMPAT_LOG_XATTRS);
@@ -790,6 +795,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds)
return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET;
}
+enum xfs_metafile_type {
+ XFS_METAFILE_UNKNOWN, /* unknown */
+ XFS_METAFILE_DIR, /* metadir directory */
+ XFS_METAFILE_USRQUOTA, /* user quota */
+ XFS_METAFILE_GRPQUOTA, /* group quota */
+ XFS_METAFILE_PRJQUOTA, /* project quota */
+ XFS_METAFILE_RTBITMAP, /* rt bitmap */
+ XFS_METAFILE_RTSUMMARY, /* rt summary */
+
+ XFS_METAFILE_MAX
+} __packed;
+
+#define XFS_METAFILE_TYPE_STR \
+ { XFS_METAFILE_UNKNOWN, "unknown" }, \
+ { XFS_METAFILE_DIR, "dir" }, \
+ { XFS_METAFILE_USRQUOTA, "usrquota" }, \
+ { XFS_METAFILE_GRPQUOTA, "grpquota" }, \
+ { XFS_METAFILE_PRJQUOTA, "prjquota" }, \
+ { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \
+ { XFS_METAFILE_RTSUMMARY, "rtsummary" }
+
/*
* On-disk inode structure.
*
@@ -812,7 +838,7 @@ struct xfs_dinode {
__be16 di_mode; /* mode and type of file */
__u8 di_version; /* inode version */
__u8 di_format; /* format of di_c data */
- __be16 di_onlink; /* old number of links to file */
+ __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */
__be32 di_uid; /* owner's user id */
__be32 di_gid; /* owner's group id */
__be32 di_nlink; /* number of links to file */
@@ -1088,21 +1114,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev)
* Values for di_flags2 These start by being exposed to userspace in the upper
* 16 bits of the XFS_XFLAG_s range.
*/
-#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */
-#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */
-#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */
-#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */
-#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */
+/* use DAX for this inode */
+#define XFS_DIFLAG2_DAX_BIT 0
+
+/* file's blocks may be shared */
+#define XFS_DIFLAG2_REFLINK_BIT 1
-#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT)
-#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT)
-#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT)
-#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT)
-#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT)
+/* copy on write extent size hint */
+#define XFS_DIFLAG2_COWEXTSIZE_BIT 2
+
+/* big timestamps */
+#define XFS_DIFLAG2_BIGTIME_BIT 3
+
+/* large extent counters */
+#define XFS_DIFLAG2_NREXT64_BIT 4
+
+/*
+ * The inode contains filesystem metadata and can be found through the metadata
+ * directory tree. Metadata inodes must satisfy the following constraints:
+ *
+ * - V5 filesystem (and ftype) are enabled;
+ * - The only valid modes are regular files and directories;
+ * - The access bits must be zero;
+ * - DMAPI event and state masks are zero;
+ * - The user and group IDs must be zero;
+ * - The project ID can be used as a u32 annotation;
+ * - The immutable, sync, noatime, nodump, nodefrag flags must be set.
+ * - The dax flag must not be set.
+ * - Directories must have nosymlinks set.
+ *
+ * These requirements are chosen defensively to minimize the ability of
+ * userspace to read or modify the contents, should a metadata file ever
+ * escape to userspace.
+ *
+ * There are further constraints on the directory tree itself:
+ *
+ * - Metadata inodes must never be resolvable through the root directory;
+ * - They must never be accessed by userspace;
+ * - Metadata directory entries must have correct ftype.
+ *
+ * Superblock-rooted metadata files must have the METADATA iflag set even
+ * though they do not have a parent directory.
+ */
+#define XFS_DIFLAG2_METADATA_BIT 5
+
+#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT)
+#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT)
+#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT)
+#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT)
+#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT)
+#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT)
#define XFS_DIFLAG2_ANY \
(XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \
- XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64)
+ XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA)
static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip)
{
@@ -1117,6 +1182,12 @@ static inline bool xfs_dinode_has_large_extent_counts(
(dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64));
}
+static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip)
+{
+ return dip->di_version >= 3 &&
+ (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA));
+}
+
/*
* Inode number format:
* low inopblog bits - offset in block
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index 860284064c5a..faa38a7d1eb0 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -198,6 +198,8 @@ struct xfs_fsop_geom {
#define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */
#define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */
#define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */
+#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */
+#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */
/* Output for XFS_FS_COUNTS */
typedef struct xfs_fsop_counts {
@@ -242,6 +244,7 @@ typedef struct xfs_fsop_resblks {
#define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */
#define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */
#define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */
+#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */
/*
* Minimum and maximum sizes need for growth checks.
@@ -489,9 +492,17 @@ struct xfs_bulk_ireq {
*/
#define XFS_BULK_IREQ_NREXT64 (1U << 2)
+/*
+ * Allow bulkstat to return information about metadata directories. This
+ * enables xfs_scrub to find them for scanning, as they are otherwise ordinary
+ * directories.
+ */
+#define XFS_BULK_IREQ_METADIR (1U << 3)
+
#define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \
XFS_BULK_IREQ_SPECIAL | \
- XFS_BULK_IREQ_NREXT64)
+ XFS_BULK_IREQ_NREXT64 | \
+ XFS_BULK_IREQ_METADIR)
/* Operate on the root directory inode. */
#define XFS_BULK_IREQ_SPECIAL_ROOT (1)
@@ -722,9 +733,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */
#define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */
#define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */
+#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 29
+#define XFS_SCRUB_TYPE_NR 30
/*
* This special type code only applies to the vectored scrub implementation.
@@ -803,6 +815,15 @@ struct xfs_scrub_vec_head {
#define XFS_SCRUB_VEC_FLAGS_ALL (0)
/*
+ * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for
+ * path checking.
+ */
+#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */
+
+/* Number of metapath sm_ino values */
+#define XFS_SCRUB_METAPATH_NR (1)
+
+/*
* ioctl limits
*/
#ifdef XATTR_LIST_MAX
diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h
index 13301420a2f6..a23df94319e5 100644
--- a/fs/xfs/libxfs/xfs_health.h
+++ b/fs/xfs/libxfs/xfs_health.h
@@ -62,6 +62,8 @@ struct xfs_da_args;
#define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */
#define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */
#define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */
+#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */
+#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */
/* Observable health issues for realtime volume metadata. */
#define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */
@@ -105,7 +107,9 @@ struct xfs_da_args;
XFS_SICK_FS_GQUOTA | \
XFS_SICK_FS_PQUOTA | \
XFS_SICK_FS_QUOTACHECK | \
- XFS_SICK_FS_NLINKS)
+ XFS_SICK_FS_NLINKS | \
+ XFS_SICK_FS_METADIR | \
+ XFS_SICK_FS_METAPATH)
#define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \
XFS_SICK_RT_SUMMARY)
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index f0261c4d9106..8b84e2cf711b 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -1842,6 +1842,40 @@ out_release:
}
/*
+ * Pick an AG for the new inode.
+ *
+ * Directories, symlinks, and regular files frequently allocate at least one
+ * block, so factor that potential expansion when we examine whether an AG has
+ * enough space for file creation. Try to keep metadata files all in the same
+ * AG.
+ */
+static inline xfs_agnumber_t
+xfs_dialloc_pick_ag(
+ struct xfs_mount *mp,
+ struct xfs_inode *dp,
+ umode_t mode)
+{
+ xfs_agnumber_t start_agno;
+
+ if (!dp)
+ return 0;
+ if (xfs_is_metadir_inode(dp)) {
+ if (mp->m_sb.sb_logstart)
+ return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart);
+ return 0;
+ }
+
+ if (S_ISDIR(mode))
+ return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi;
+
+ start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino);
+ if (start_agno >= mp->m_maxagi)
+ start_agno = 0;
+
+ return start_agno;
+}
+
+/*
* Allocate an on-disk inode.
*
* Mode is used to tell whether the new inode is a directory and hence where to
@@ -1856,31 +1890,19 @@ xfs_dialloc(
xfs_ino_t *new_ino)
{
struct xfs_mount *mp = (*tpp)->t_mountp;
+ struct xfs_perag *pag;
+ struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ xfs_ino_t ino = NULLFSINO;
xfs_ino_t parent = args->pip ? args->pip->i_ino : 0;
- umode_t mode = args->mode & S_IFMT;
xfs_agnumber_t agno;
- int error = 0;
xfs_agnumber_t start_agno;
- struct xfs_perag *pag;
- struct xfs_ino_geometry *igeo = M_IGEO(mp);
+ umode_t mode = args->mode & S_IFMT;
bool ok_alloc = true;
bool low_space = false;
int flags;
- xfs_ino_t ino = NULLFSINO;
+ int error = 0;
- /*
- * Directories, symlinks, and regular files frequently allocate at least
- * one block, so factor that potential expansion when we examine whether
- * an AG has enough space for file creation.
- */
- if (S_ISDIR(mode))
- start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
- mp->m_maxagi;
- else {
- start_agno = XFS_INO_TO_AGNO(mp, parent);
- if (start_agno >= mp->m_maxagi)
- start_agno = 0;
- }
+ start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode);
/*
* If we have already hit the ceiling of inode blocks then clear
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 79babeac9d75..424861fbf1bd 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -19,6 +19,7 @@
#include "xfs_ialloc.h"
#include "xfs_dir2.h"
#include "xfs_health.h"
+#include "xfs_metafile.h"
#include <linux/iversion.h>
@@ -209,12 +210,15 @@ xfs_inode_from_disk(
* They will also be unconditionally written back to disk as v2 inodes.
*/
if (unlikely(from->di_version == 1)) {
- set_nlink(inode, be16_to_cpu(from->di_onlink));
+ /* di_metatype used to be di_onlink */
+ set_nlink(inode, be16_to_cpu(from->di_metatype));
ip->i_projid = 0;
} else {
set_nlink(inode, be32_to_cpu(from->di_nlink));
ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 |
be16_to_cpu(from->di_projid_lo);
+ if (xfs_dinode_is_metadir(from))
+ ip->i_metatype = be16_to_cpu(from->di_metatype);
}
i_uid_write(inode, be32_to_cpu(from->di_uid));
@@ -315,7 +319,10 @@ xfs_inode_to_disk(
struct inode *inode = VFS_I(ip);
to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
- to->di_onlink = 0;
+ if (xfs_is_metadir_inode(ip))
+ to->di_metatype = cpu_to_be16(ip->i_metatype);
+ else
+ to->di_metatype = 0;
to->di_format = xfs_ifork_format(&ip->i_df);
to->di_uid = cpu_to_be32(i_uid_read(inode));
@@ -483,6 +490,69 @@ xfs_dinode_verify_nrext64(
return NULL;
}
+/*
+ * Validate all the picky requirements we have for a file that claims to be
+ * filesystem metadata.
+ */
+xfs_failaddr_t
+xfs_dinode_verify_metadir(
+ struct xfs_mount *mp,
+ struct xfs_dinode *dip,
+ uint16_t mode,
+ uint16_t flags,
+ uint64_t flags2)
+{
+ if (!xfs_has_metadir(mp))
+ return __this_address;
+
+ /* V5 filesystem only */
+ if (dip->di_version < 3)
+ return __this_address;
+
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ return __this_address;
+
+ /* V3 inode fields that are always zero */
+ if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad)
+ return __this_address;
+ if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter)
+ return __this_address;
+
+ /* Metadata files can only be directories or regular files */
+ if (!S_ISDIR(mode) && !S_ISREG(mode))
+ return __this_address;
+
+ /* They must have zero access permissions */
+ if (mode & 0777)
+ return __this_address;
+
+ /* DMAPI event and state masks are zero */
+ if (dip->di_dmevmask || dip->di_dmstate)
+ return __this_address;
+
+ /*
+ * User and group IDs must be zero. The project ID is used for
+ * grouping inodes. Metadata inodes are never accounted to quotas.
+ */
+ if (dip->di_uid || dip->di_gid)
+ return __this_address;
+
+ /* Mandatory inode flags must be set */
+ if (S_ISDIR(mode)) {
+ if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS)
+ return __this_address;
+ } else {
+ if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS)
+ return __this_address;
+ }
+
+ /* dax flags2 must not be set */
+ if (flags2 & XFS_DIFLAG2_DAX)
+ return __this_address;
+
+ return NULL;
+}
+
xfs_failaddr_t
xfs_dinode_verify(
struct xfs_mount *mp,
@@ -523,8 +593,11 @@ xfs_dinode_verify(
* di_nlink==0 on a V1 inode. V2/3 inodes would get written out with
* di_onlink==0, so we can check that.
*/
- if (dip->di_version >= 2) {
- if (dip->di_onlink)
+ if (dip->di_version == 2) {
+ if (dip->di_metatype)
+ return __this_address;
+ } else if (dip->di_version >= 3) {
+ if (!xfs_dinode_is_metadir(dip) && dip->di_metatype)
return __this_address;
}
@@ -546,7 +619,8 @@ xfs_dinode_verify(
if (dip->di_nlink)
return __this_address;
} else {
- if (dip->di_onlink)
+ /* di_metatype used to be di_onlink */
+ if (dip->di_metatype)
return __this_address;
}
}
@@ -663,6 +737,12 @@ xfs_dinode_verify(
!xfs_has_bigtime(mp))
return __this_address;
+ if (flags2 & XFS_DIFLAG2_METADATA) {
+ fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2);
+ if (fa)
+ return fa;
+ }
+
return NULL;
}
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 585ed5a110af..8d43d2641c73 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino,
struct xfs_dinode *dip);
+xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp,
+ struct xfs_dinode *dip, uint16_t mode, uint16_t flags,
+ uint64_t flags2);
xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp,
uint32_t extsize, uint16_t mode, uint16_t flags);
xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp,
diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c
index ec64eda3bbe2..deb0b7c00a1f 100644
--- a/fs/xfs/libxfs/xfs_inode_util.c
+++ b/fs/xfs/libxfs/xfs_inode_util.c
@@ -224,6 +224,8 @@ xfs_inode_inherit_flags2(
}
if (pip->i_diflags2 & XFS_DIFLAG2_DAX)
ip->i_diflags2 |= XFS_DIFLAG2_DAX;
+ if (xfs_is_metadir_inode(pip))
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
/* Don't let invalid cowextsize hints propagate. */
failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize,
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 3e6682ed656b..ace7384a275b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -404,7 +404,7 @@ struct xfs_log_dinode {
uint16_t di_mode; /* mode and type of file */
int8_t di_version; /* inode version */
int8_t di_format; /* format of di_c data */
- uint8_t di_pad3[2]; /* unused in v2/3 inodes */
+ uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */
uint32_t di_uid; /* owner's user id */
uint32_t di_gid; /* owner's group id */
uint32_t di_nlink; /* number of links to file */
diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c
new file mode 100644
index 000000000000..bae7377c0f22
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_ialloc.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_da_format.h"
+#include "xfs_da_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_ag.h"
+#include "xfs_dir2.h"
+#include "xfs_dir2_priv.h"
+#include "xfs_parent.h"
+#include "xfs_health.h"
+
+/*
+ * Metadata Directory Tree
+ * =======================
+ *
+ * These functions provide an abstraction layer for looking up, creating, and
+ * deleting metadata inodes that live within a special metadata directory tree.
+ *
+ * This code does not manage the five existing metadata inodes: real time
+ * bitmap & summary; and the user, group, and quotas. All other metadata
+ * inodes must use only the xfs_meta{dir,file}_* functions.
+ *
+ * Callers wishing to create or hardlink a metadata inode must create an
+ * xfs_metadir_update structure, call the appropriate xfs_metadir* function,
+ * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel
+ * the update. Files in the metadata directory tree currently cannot be
+ * unlinked.
+ *
+ * When the metadir feature is enabled, all metadata inodes must have the
+ * "metadata" inode flag set to prevent them from being exposed to the outside
+ * world.
+ *
+ * Callers must take the ILOCK of any inode in the metadata directory tree to
+ * synchronize access to that inode. It is never necessary to take the IOLOCK
+ * or the MMAPLOCK since metadata inodes must not be exposed to user space.
+ */
+
+static inline void
+xfs_metadir_set_xname(
+ struct xfs_name *xname,
+ const char *path,
+ unsigned char ftype)
+{
+ xname->name = (const unsigned char *)path;
+ xname->len = strlen(path);
+ xname->type = ftype;
+}
+
+/*
+ * Given a parent directory @dp and a metadata inode path component @xname,
+ * Look up the inode number in the directory, returning it in @ino.
+ * @xname.type must match the directory entry's ftype.
+ *
+ * Caller must hold ILOCK_EXCL.
+ */
+static inline int
+xfs_metadir_lookup(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ struct xfs_name *xname,
+ xfs_ino_t *ino)
+{
+ struct xfs_mount *mp = dp->i_mount;
+ struct xfs_da_args args = {
+ .trans = tp,
+ .dp = dp,
+ .geo = mp->m_dir_geo,
+ .name = xname->name,
+ .namelen = xname->len,
+ .hashval = xfs_dir2_hashname(mp, xname),
+ .whichfork = XFS_DATA_FORK,
+ .op_flags = XFS_DA_OP_OKNOENT,
+ .owner = dp->i_ino,
+ };
+ int error;
+
+ if (!S_ISDIR(VFS_I(dp)->i_mode)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xfs_is_shutdown(mp))
+ return -EIO;
+
+ error = xfs_dir_lookup_args(&args);
+ if (error)
+ return error;
+
+ if (!xfs_verify_ino(mp, args.inumber)) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+ if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) {
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+ }
+
+ trace_xfs_metadir_lookup(dp, xname, args.inumber);
+ *ino = args.inumber;
+ return 0;
+}
+
+/*
+ * Look up and read a metadata inode from the metadata directory. If the path
+ * component doesn't exist, return -ENOENT.
+ */
+int
+xfs_metadir_load(
+ struct xfs_trans *tp,
+ struct xfs_inode *dp,
+ const char *path,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_name xname;
+ xfs_ino_t ino;
+ int error;
+
+ xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN);
+
+ xfs_ilock(dp, XFS_ILOCK_EXCL);
+ error = xfs_metadir_lookup(tp, dp, &xname, &ino);
+ xfs_iunlock(dp, XFS_ILOCK_EXCL);
+ if (error)
+ return error;
+ return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+}
+
+/*
+ * Unlock and release resources after committing (or cancelling) a metadata
+ * directory tree operation. The caller retains its reference to @upd->ip
+ * and must release it explicitly.
+ */
+static inline void
+xfs_metadir_teardown(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_teardown(upd, error);
+
+ if (upd->ppargs) {
+ xfs_parent_finish(upd->dp->i_mount, upd->ppargs);
+ upd->ppargs = NULL;
+ }
+
+ if (upd->ip) {
+ if (upd->ip_locked)
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ upd->ip_locked = false;
+ }
+
+ if (upd->dp_locked)
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ upd->dp_locked = false;
+}
+
+/*
+ * Begin the process of creating a metadata file by allocating transactions
+ * and taking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_create(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip == NULL);
+ ASSERT(xfs_has_metadir(mp));
+ ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN);
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ /*
+ * If we ever need the ability to create rt metadata files on a
+ * pre-metadir filesystem, we'll need to dqattach the parent here.
+ * Currently we assume that mkfs will create the files and quotacheck
+ * will account for them.
+ */
+
+ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create,
+ xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp);
+ if (error)
+ goto out_teardown;
+
+ /*
+ * Lock the parent directory if there is one. We can't ijoin it to
+ * the transaction until after the child file has been created.
+ */
+ xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT);
+ upd->dp_locked = true;
+
+ trace_xfs_metadir_start_create(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Create a metadata inode with the given @mode, and insert it into the
+ * metadata directory tree at the given @upd->path. The path up to the final
+ * component must already exist. The final path component must not exist.
+ *
+ * The new metadata inode will be attached to the update structure @upd->ip,
+ * with the ILOCK held until the caller releases it.
+ *
+ * NOTE: This function may return a new inode to the caller even if it returns
+ * a negative error code. If an inode is passed back, the caller must finish
+ * setting up the inode before releasing it.
+ */
+int
+xfs_metadir_create(
+ struct xfs_metadir_update *upd,
+ umode_t mode)
+{
+ struct xfs_icreate_args args = {
+ .pip = upd->dp,
+ .mode = mode,
+ };
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+
+ /* Check that the name does not already exist in the directory. */
+ xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN);
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ /*
+ * A newly created regular or special file just has one directory
+ * entry pointing to them, but a directory also the "." entry
+ * pointing to itself.
+ */
+ error = xfs_dialloc(&upd->tp, &args, &ino);
+ if (error)
+ return error;
+ error = xfs_icreate(upd->tp, ino, &args, &upd->ip);
+ if (error)
+ return error;
+ du.ip = upd->ip;
+ xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type);
+ upd->ip_locked = true;
+
+ /*
+ * Join the directory inode to the transaction. We do not do it
+ * earlier because xfs_dialloc rolls the transaction.
+ */
+ xfs_trans_ijoin(upd->tp, upd->dp, 0);
+
+ /* Create the entry. */
+ if (S_ISDIR(args.mode))
+ resblks = xfs_mkdir_space_res(mp, xname.len);
+ else
+ resblks = xfs_create_space_res(mp, xname.len);
+ xname.type = xfs_mode_to_ftype(args.mode);
+
+ trace_xfs_metadir_try_create(upd);
+
+ error = xfs_dir_create_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ /* Metadir files are not accounted to quota. */
+
+ trace_xfs_metadir_create(upd);
+
+ return 0;
+}
+
+#ifndef __KERNEL__
+/*
+ * Begin the process of linking a metadata file by allocating transactions
+ * and locking whatever resources we're going to need.
+ */
+int
+xfs_metadir_start_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_mount *mp = upd->dp->i_mount;
+ unsigned int resblks;
+ int nospace_error = 0;
+ int error;
+
+ ASSERT(upd->dp != NULL);
+ ASSERT(upd->ip != NULL);
+ ASSERT(xfs_has_metadir(mp));
+
+ error = xfs_parent_start(mp, &upd->ppargs);
+ if (error)
+ return error;
+
+ resblks = xfs_link_space_res(mp, MAXNAMELEN);
+ error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip,
+ &resblks, &upd->tp, &nospace_error);
+ if (error)
+ goto out_teardown;
+ if (!resblks) {
+ /* We don't allow reservationless updates. */
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+ xfs_iunlock(upd->dp, XFS_ILOCK_EXCL);
+ xfs_iunlock(upd->ip, XFS_ILOCK_EXCL);
+ error = nospace_error;
+ goto out_teardown;
+ }
+
+ upd->dp_locked = true;
+ upd->ip_locked = true;
+
+ trace_xfs_metadir_start_link(upd);
+ return 0;
+out_teardown:
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/*
+ * Link the metadata directory given by @path to the inode @upd->ip.
+ * The path (up to the final component) must already exist, but the final
+ * component must not already exist.
+ */
+int
+xfs_metadir_link(
+ struct xfs_metadir_update *upd)
+{
+ struct xfs_name xname;
+ struct xfs_dir_update du = {
+ .dp = upd->dp,
+ .name = &xname,
+ .ip = upd->ip,
+ .ppargs = upd->ppargs,
+ };
+ struct xfs_mount *mp = upd->dp->i_mount;
+ xfs_ino_t ino;
+ unsigned int resblks;
+ int error;
+
+ xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL);
+ xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL);
+
+ /* Look up the name in the current directory. */
+ xfs_metadir_set_xname(&xname, upd->path,
+ xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode));
+ error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino);
+ switch (error) {
+ case -ENOENT:
+ break;
+ case 0:
+ error = -EEXIST;
+ fallthrough;
+ default:
+ return error;
+ }
+
+ resblks = xfs_link_space_res(mp, xname.len);
+ error = xfs_dir_add_child(upd->tp, resblks, &du);
+ if (error)
+ return error;
+
+ trace_xfs_metadir_link(upd);
+
+ return 0;
+}
+#endif /* ! __KERNEL__ */
+
+/* Commit a metadir update and unlock/drop all resources. */
+int
+xfs_metadir_commit(
+ struct xfs_metadir_update *upd)
+{
+ int error;
+
+ trace_xfs_metadir_commit(upd);
+
+ error = xfs_trans_commit(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+ return error;
+}
+
+/* Cancel a metadir update and unlock/drop all resources. */
+void
+xfs_metadir_cancel(
+ struct xfs_metadir_update *upd,
+ int error)
+{
+ trace_xfs_metadir_cancel(upd);
+
+ xfs_trans_cancel(upd->tp);
+ upd->tp = NULL;
+
+ xfs_metadir_teardown(upd, error);
+}
+
+/* Create a metadata for the last component of the path. */
+int
+xfs_metadir_mkdir(
+ struct xfs_inode *dp,
+ const char *path,
+ struct xfs_inode **ipp)
+{
+ struct xfs_metadir_update upd = {
+ .dp = dp,
+ .path = path,
+ .metafile_type = XFS_METAFILE_DIR,
+ };
+ int error;
+
+ if (xfs_is_shutdown(dp->i_mount))
+ return -EIO;
+
+ /* Allocate a transaction to create the last directory. */
+ error = xfs_metadir_start_create(&upd);
+ if (error)
+ return error;
+
+ /* Create the subdirectory and take our reference. */
+ error = xfs_metadir_create(&upd, S_IFDIR);
+ if (error)
+ goto out_cancel;
+
+ error = xfs_metadir_commit(&upd);
+ if (error)
+ goto out_irele;
+
+ xfs_finish_inode_setup(upd.ip);
+ *ipp = upd.ip;
+ return 0;
+
+out_cancel:
+ xfs_metadir_cancel(&upd, error);
+out_irele:
+ /* Have to finish setting up the inode to ensure it's deleted. */
+ if (upd.ip) {
+ xfs_finish_inode_setup(upd.ip);
+ xfs_irele(upd.ip);
+ }
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h
new file mode 100644
index 000000000000..bfecac7d3d14
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metadir.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METADIR_H__
+#define __XFS_METADIR_H__
+
+/* Cleanup widget for metadata inode creation and deletion. */
+struct xfs_metadir_update {
+ /* Parent directory */
+ struct xfs_inode *dp;
+
+ /* Path to metadata file */
+ const char *path;
+
+ /* Parent pointer update context */
+ struct xfs_parent_args *ppargs;
+
+ /* Child metadata file */
+ struct xfs_inode *ip;
+
+ struct xfs_trans *tp;
+
+ enum xfs_metafile_type metafile_type;
+
+ unsigned int dp_locked:1;
+ unsigned int ip_locked:1;
+};
+
+int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp,
+ const char *path, enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp);
+
+int xfs_metadir_start_create(struct xfs_metadir_update *upd);
+int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode);
+
+int xfs_metadir_start_link(struct xfs_metadir_update *upd);
+int xfs_metadir_link(struct xfs_metadir_update *upd);
+
+int xfs_metadir_commit(struct xfs_metadir_update *upd);
+void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error);
+
+int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path,
+ struct xfs_inode **ipp);
+
+#endif /* __XFS_METADIR_H__ */
diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c
new file mode 100644
index 000000000000..adeb25d1a444
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_bit.h"
+#include "xfs_sb.h"
+#include "xfs_mount.h"
+#include "xfs_defer.h"
+#include "xfs_trans.h"
+#include "xfs_metafile.h"
+#include "xfs_trace.h"
+#include "xfs_inode.h"
+
+/* Set up an inode to be recognized as a metadata directory inode. */
+void
+xfs_metafile_set_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type)
+{
+ VFS_I(ip)->i_mode &= ~0777;
+ VFS_I(ip)->i_uid = GLOBAL_ROOT_UID;
+ VFS_I(ip)->i_gid = GLOBAL_ROOT_GID;
+ if (S_ISDIR(VFS_I(ip)->i_mode))
+ ip->i_diflags |= XFS_METADIR_DIFLAGS;
+ else
+ ip->i_diflags |= XFS_METAFILE_DIFLAGS;
+ ip->i_diflags2 &= ~XFS_DIFLAG2_DAX;
+ ip->i_diflags2 |= XFS_DIFLAG2_METADATA;
+ ip->i_metatype = metafile_type;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
+
+/* Clear the metadata directory inode flag. */
+void
+xfs_metafile_clear_iflag(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
+{
+ ASSERT(xfs_is_metadir_inode(ip));
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+
+ ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA;
+ xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
+}
diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h
new file mode 100644
index 000000000000..acec400123db
--- /dev/null
+++ b/fs/xfs/libxfs/xfs_metafile.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (c) 2018-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_METAFILE_H__
+#define __XFS_METAFILE_H__
+
+/* All metadata files must have these flags set. */
+#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \
+ XFS_DIFLAG_SYNC | \
+ XFS_DIFLAG_NOATIME | \
+ XFS_DIFLAG_NODUMP | \
+ XFS_DIFLAG_NODEFRAG)
+
+/* All metadata directories must have these flags set. */
+#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \
+ XFS_DIFLAG_NOSYMLINKS)
+
+void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip,
+ enum xfs_metafile_type metafile_type);
+void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip);
+
+/* Code specific to kernel/userspace; must be provided externally. */
+
+int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type, struct xfs_inode **ipp);
+
+#endif /* __XFS_METAFILE_H__ */
diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h
index 23c133fd36f5..8bca86e350fd 100644
--- a/fs/xfs/libxfs/xfs_ondisk.h
+++ b/fs/xfs/libxfs/xfs_ondisk.h
@@ -37,7 +37,7 @@ xfs_check_ondisk_structs(void)
XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176);
XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104);
XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136);
- XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 272);
XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index aae67c7afcb9..e6fad3512220 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -180,6 +180,8 @@ xfs_sb_version_to_features(
features |= XFS_FEAT_EXCHANGE_RANGE;
if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT)
features |= XFS_FEAT_PARENT;
+ if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ features |= XFS_FEAT_METADIR;
return features;
}
@@ -703,6 +705,11 @@ __xfs_sb_from_disk(
/* Convert on-disk flags to in-memory flags? */
if (convert_xquota)
xfs_sb_quota_from_disk(to);
+
+ if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ to->sb_metadirino = be64_to_cpu(from->sb_metadirino);
+ else
+ to->sb_metadirino = NULLFSINO;
}
void
@@ -850,6 +857,9 @@ xfs_sb_to_disk(
to->sb_lsn = cpu_to_be64(from->sb_lsn);
if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID)
uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid);
+
+ if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR)
+ to->sb_metadirino = cpu_to_be64(from->sb_metadirino);
}
/*
@@ -1299,6 +1309,8 @@ xfs_fs_geometry(
geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64;
if (xfs_has_exchange_range(mp))
geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE;
+ if (xfs_has_metadir(mp))
+ geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR;
geo->rtsectsize = sbp->sb_blocksize;
geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp);
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index c91db4f51407..1cbfe57e971d 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -111,7 +111,7 @@ xfs_verify_ino(
/* Is this an internal inode number? */
inline bool
-xfs_internal_inum(
+xfs_is_sb_inum(
struct xfs_mount *mp,
xfs_ino_t ino)
{
@@ -129,7 +129,7 @@ xfs_verify_dir_ino(
struct xfs_mount *mp,
xfs_ino_t ino)
{
- if (xfs_internal_inum(mp, ino))
+ if (xfs_is_sb_inum(mp, ino))
return false;
return xfs_verify_ino(mp, ino);
}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index d3cb6ff3b913..25053a66c225 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -230,7 +230,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno,
xfs_fsblock_t len);
bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
-bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
+bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c
index f8e5b67128d2..cad997f38a42 100644
--- a/fs/xfs/scrub/agheader.c
+++ b/fs/xfs/scrub/agheader.c
@@ -144,6 +144,11 @@ xchk_superblock(
if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino))
xchk_block_set_preen(sc, bp);
+ if (xfs_has_metadir(sc->mp)) {
+ if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino))
+ xchk_block_set_preen(sc, bp);
+ }
+
if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino))
xchk_block_set_preen(sc, bp);
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index e8b5e73bab60..c26a3314237a 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -39,6 +39,7 @@
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/health.h"
+#include "scrub/tempfile.h"
/* Common code for the metadata scrubbers. */
@@ -947,9 +948,15 @@ xchk_iget_for_scrubbing(
if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino)
return xchk_install_live_inode(sc, ip_in);
- /* Reject internal metadata files and obviously bad inode numbers. */
- if (xfs_internal_inum(mp, sc->sm->sm_ino))
+ /*
+ * On pre-metadir filesystems, reject internal metadata files. For
+ * metadir filesystems, limited scrubbing of any file in the metadata
+ * directory tree by handle is allowed, because that is the only way to
+ * validate the lack of parent pointers in the sb-root metadata inodes.
+ */
+ if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino))
return -ENOENT;
+ /* Reject obviously bad inode numbers. */
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
return -ENOENT;
@@ -1084,6 +1091,10 @@ xchk_setup_inode_contents(
if (error)
return error;
+ error = xrep_tempfile_adjust_directory_tree(sc);
+ if (error)
+ return error;
+
/* Lock the inode so the VFS cannot touch this file. */
xchk_ilock(sc, XFS_IOLOCK_EXCL);
@@ -1239,12 +1250,6 @@ xchk_metadata_inode_forks(
return 0;
}
- /* They also should never have extended attributes. */
- if (xfs_inode_hasattr(sc->ip)) {
- xchk_ino_set_corrupt(sc, sc->ip->i_ino);
- return 0;
- }
-
/* Invoke the data fork scrubber. */
error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD);
if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
@@ -1261,6 +1266,21 @@ xchk_metadata_inode_forks(
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
}
+ /*
+ * Metadata files can only have extended attributes on metadir
+ * filesystems, either for parent pointers or for actual xattr data.
+ */
+ if (xfs_inode_hasattr(sc->ip)) {
+ if (!xfs_has_metadir(sc->mp)) {
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ return 0;
+ }
+
+ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
+ if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT))
+ return error;
+ }
+
return 0;
}
@@ -1446,3 +1466,32 @@ out_rcu:
rcu_read_unlock();
return error;
}
+
+/* Is this inode a root directory for either tree? */
+bool
+xchk_inode_is_dirtree_root(const struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ return ip == mp->m_rootip ||
+ (xfs_has_metadir(mp) && ip == mp->m_metadirip);
+}
+
+/* Does the superblock point down to this inode? */
+bool
+xchk_inode_is_sb_rooted(const struct xfs_inode *ip)
+{
+ return xchk_inode_is_dirtree_root(ip) ||
+ xfs_is_sb_inum(ip->i_mount, ip->i_ino);
+}
+
+/* What is the root directory inumber for this inode? */
+xfs_ino_t
+xchk_inode_rootdir_inum(const struct xfs_inode *ip)
+{
+ struct xfs_mount *mp = ip->i_mount;
+
+ if (xfs_is_metadir_inode(ip))
+ return mp->m_metadirip->i_ino;
+ return mp->m_rootip->i_ino;
+}
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index f3db628b14e1..b2a81e85ded9 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -73,6 +73,7 @@ int xchk_setup_xattr(struct xfs_scrub *sc);
int xchk_setup_symlink(struct xfs_scrub *sc);
int xchk_setup_parent(struct xfs_scrub *sc);
int xchk_setup_dirtree(struct xfs_scrub *sc);
+int xchk_setup_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_setup_rtbitmap(struct xfs_scrub *sc);
int xchk_setup_rtsummary(struct xfs_scrub *sc);
@@ -242,4 +243,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks);
int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino,
bool *inuse);
+bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip);
+bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip);
+xfs_ino_t xchk_inode_rootdir_inum(const struct xfs_inode *ip);
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index bf9199e8df63..c877bde71e62 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -100,6 +100,14 @@ xchk_dir_check_ftype(
if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
+
+ /*
+ * Metadata and regular inodes cannot cross trees. This property
+ * cannot change without a full inode free and realloc cycle, so it's
+ * safe to check this without holding locks.
+ */
+ if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip))
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
}
/*
@@ -253,7 +261,7 @@ xchk_dir_actor(
* If this is ".." in the root inode, check that the inum
* matches this dir.
*/
- if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino)
+ if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset);
}
diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c
index 64679fe08446..249313882108 100644
--- a/fs/xfs/scrub/dir_repair.c
+++ b/fs/xfs/scrub/dir_repair.c
@@ -415,6 +415,12 @@ xrep_dir_salvage_entry(
if (error)
return 0;
+ /* Don't mix metadata and regular directory trees. */
+ if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) {
+ xchk_irele(sc, ip);
+ return 0;
+ }
+
xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode);
xchk_irele(sc, ip);
@@ -1270,7 +1276,7 @@ xrep_dir_scan_dirtree(
int error;
/* Roots of directory trees are their own parents. */
- if (sc->ip == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(sc->ip))
xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino);
/*
@@ -1632,6 +1638,7 @@ xrep_dir_swap(
struct xrep_dir *rd)
{
struct xfs_scrub *sc = rd->sc;
+ xfs_ino_t ino;
bool ip_local, temp_local;
int error = 0;
@@ -1649,14 +1656,17 @@ xrep_dir_swap(
/*
* Reset the temporary directory's '..' entry to point to the parent
- * that we found. The temporary directory was created with the root
- * directory as the parent, so we can skip this if repairing a
- * subdirectory of the root.
+ * that we found. The dirent replace code asserts if the dirent
+ * already points at the new inumber, so we look it up here.
*
* It's also possible that this replacement could also expand a sf
* tempdir into block format.
*/
- if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) {
+ error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino);
+ if (error)
+ return error;
+
+ if (rd->pscan.parent_ino != ino) {
error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot,
rd->pscan.parent_ino, rd->tx.req.resblks);
if (error)
diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c
index bde58fb561ea..3a9cdf8738b6 100644
--- a/fs/xfs/scrub/dirtree.c
+++ b/fs/xfs/scrub/dirtree.c
@@ -362,7 +362,8 @@ xchk_dirpath_set_outcome(
STATIC int
xchk_dirpath_step_up(
struct xchk_dirtree *dl,
- struct xchk_dirpath *path)
+ struct xchk_dirpath *path,
+ bool is_metadir)
{
struct xfs_scrub *sc = dl->sc;
struct xfs_inode *dp;
@@ -435,6 +436,14 @@ xchk_dirpath_step_up(
goto out_scanlock;
}
+ /* Parent must be in the same directory tree. */
+ if (is_metadir != xfs_is_metadir_inode(dp)) {
+ trace_xchk_dirpath_crosses_tree(dl->sc, dp, path->path_nr,
+ path->nr_steps, &dl->xname, &dl->pptr_rec);
+ error = -EFSCORRUPTED;
+ goto out_scanlock;
+ }
+
/*
* If the extended attributes look as though they has been zapped by
* the inode record repair code, we cannot scan for parent pointers.
@@ -508,6 +517,7 @@ xchk_dirpath_walk_upwards(
struct xchk_dirpath *path)
{
struct xfs_scrub *sc = dl->sc;
+ bool is_metadir;
int error;
ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL);
@@ -538,6 +548,7 @@ xchk_dirpath_walk_upwards(
* ILOCK state is no longer tracked in the scrub context. Hence we
* must drop @sc->ip's ILOCK during the walk.
*/
+ is_metadir = xfs_is_metadir_inode(sc->ip);
mutex_unlock(&dl->lock);
xchk_iunlock(sc, XFS_ILOCK_EXCL);
@@ -547,7 +558,7 @@ xchk_dirpath_walk_upwards(
* If we see any kind of error here (including corruptions), the parent
* pointer of @sc->ip is corrupt. Stop the whole scan.
*/
- error = xchk_dirpath_step_up(dl, path);
+ error = xchk_dirpath_step_up(dl, path, is_metadir);
if (error) {
xchk_ilock(sc, XFS_ILOCK_EXCL);
mutex_lock(&dl->lock);
@@ -560,7 +571,7 @@ xchk_dirpath_walk_upwards(
* *somewhere* in the path, but we don't need to stop scanning.
*/
while (!error && path->outcome == XCHK_DIRPATH_SCANNING)
- error = xchk_dirpath_step_up(dl, path);
+ error = xchk_dirpath_step_up(dl, path, is_metadir);
/* Retake the locks we had, mark paths, etc. */
xchk_ilock(sc, XFS_ILOCK_EXCL);
@@ -917,7 +928,7 @@ xchk_dirtree(
* scan, because the hook doesn't detach until after sc->ip gets
* released during teardown.
*/
- dl->root_ino = sc->mp->m_rootip->i_ino;
+ dl->root_ino = xchk_inode_rootdir_inum(sc->ip);
dl->scan_ino = sc->ip->i_ino;
trace_xchk_dirtree_start(sc->ip, sc->sm, 0);
@@ -983,3 +994,16 @@ out:
trace_xchk_dirtree_done(sc->ip, sc->sm, error);
return error;
}
+
+/* Does the directory targetted by this scrub have no parents? */
+bool
+xchk_dirtree_parentless(const struct xchk_dirtree *dl)
+{
+ struct xfs_scrub *sc = dl->sc;
+
+ if (xchk_inode_is_dirtree_root(sc->ip))
+ return true;
+ if (VFS_I(sc->ip)->i_nlink == 0)
+ return true;
+ return false;
+}
diff --git a/fs/xfs/scrub/dirtree.h b/fs/xfs/scrub/dirtree.h
index 1e1686365c61..9e5d95492717 100644
--- a/fs/xfs/scrub/dirtree.h
+++ b/fs/xfs/scrub/dirtree.h
@@ -156,17 +156,7 @@ struct xchk_dirtree {
#define xchk_dirtree_for_each_path(dl, path) \
list_for_each_entry((path), &(dl)->path_list, list)
-static inline bool
-xchk_dirtree_parentless(const struct xchk_dirtree *dl)
-{
- struct xfs_scrub *sc = dl->sc;
-
- if (sc->ip == sc->mp->m_rootip)
- return true;
- if (VFS_I(sc->ip)->i_nlink == 0)
- return true;
- return false;
-}
+bool xchk_dirtree_parentless(const struct xchk_dirtree *dl);
int xchk_dirtree_find_paths_to_root(struct xchk_dirtree *dl);
int xchk_dirpath_append(struct xchk_dirtree *dl, struct xfs_inode *ip,
diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c
index 01766041ba2c..84487072b6dd 100644
--- a/fs/xfs/scrub/findparent.c
+++ b/fs/xfs/scrub/findparent.c
@@ -172,6 +172,10 @@ xrep_findparent_walk_directory(
*/
lock_mode = xfs_ilock_data_map_shared(dp);
+ /* Don't mix metadata and regular directory trees. */
+ if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip))
+ goto out_unlock;
+
/*
* If this directory is known to be sick, we cannot scan it reliably
* and must abort.
@@ -362,15 +366,24 @@ xrep_findparent_confirm(
};
int error;
- /*
- * The root directory always points to itself. Unlinked dirs can point
- * anywhere, so we point them at the root dir too.
- */
- if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) {
+ /* The root directory always points to itself. */
+ if (sc->ip == sc->mp->m_rootip) {
*parent_ino = sc->mp->m_sb.sb_rootino;
return 0;
}
+ /* The metadata root directory always points to itself. */
+ if (sc->ip == sc->mp->m_metadirip) {
+ *parent_ino = sc->mp->m_sb.sb_metadirino;
+ return 0;
+ }
+
+ /* Unlinked dirs can point anywhere; point them up to the root dir. */
+ if (VFS_I(sc->ip)->i_nlink == 0) {
+ *parent_ino = xchk_inode_rootdir_inum(sc->ip);
+ return 0;
+ }
+
/* Reject garbage parent inode numbers and self-referential parents. */
if (*parent_ino == NULLFSINO)
return 0;
@@ -412,8 +425,11 @@ xrep_findparent_self_reference(
if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino)
return sc->mp->m_sb.sb_rootino;
+ if (sc->ip->i_ino == sc->mp->m_sb.sb_metadirino)
+ return sc->mp->m_sb.sb_metadirino;
+
if (VFS_I(sc->ip)->i_nlink == 0)
- return sc->mp->m_sb.sb_rootino;
+ return xchk_inode_rootdir_inum(sc->ip);
return NULLFSINO;
}
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 6ceef3749e3b..b8b92ff3a573 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
[XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS },
[XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE },
+ [XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH },
};
/* Return the health status mask for this scrub type. */
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index d32716fb2fec..25ee66e7649d 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -60,6 +60,22 @@ xchk_install_handle_iscrub(
if (error)
return error;
+ /*
+ * Don't allow scrubbing by handle of any non-directory inode records
+ * in the metadata directory tree. We don't know if any of the scans
+ * launched by this scrubber will end up indirectly trying to lock this
+ * file.
+ *
+ * Scrubbers of inode-rooted metadata files (e.g. quota files) will
+ * attach all the resources needed to scrub the inode and call
+ * xchk_inode directly. Userspace cannot call this directly.
+ */
+ if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) {
+ xchk_irele(sc, ip);
+ sc->ip = NULL;
+ return -ENOENT;
+ }
+
return xchk_prepare_iscrub(sc);
}
@@ -94,9 +110,15 @@ xchk_setup_inode(
return xchk_prepare_iscrub(sc);
}
- /* Reject internal metadata files and obviously bad inode numbers. */
- if (xfs_internal_inum(mp, sc->sm->sm_ino))
+ /*
+ * On pre-metadir filesystems, reject internal metadata files. For
+ * metadir filesystems, limited scrubbing of any file in the metadata
+ * directory tree by handle is allowed, because that is the only way to
+ * validate the lack of parent pointers in the sb-root metadata inodes.
+ */
+ if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino))
return -ENOENT;
+ /* Reject obviously bad inode numbers. */
if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino))
return -ENOENT;
@@ -421,8 +443,13 @@ xchk_dinode(
break;
case 2:
case 3:
- if (dip->di_onlink != 0)
- xchk_ino_set_corrupt(sc, ino);
+ if (xfs_dinode_is_metadir(dip)) {
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ xchk_ino_set_corrupt(sc, ino);
+ } else {
+ if (dip->di_metatype != 0)
+ xchk_ino_set_corrupt(sc, ino);
+ }
if (dip->di_mode == 0 && sc->ip)
xchk_ino_set_corrupt(sc, ino);
diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c
index 5da9e1a387a8..5a58ddd27bd2 100644
--- a/fs/xfs/scrub/inode_repair.c
+++ b/fs/xfs/scrub/inode_repair.c
@@ -521,10 +521,17 @@ STATIC void
xrep_dinode_nlinks(
struct xfs_dinode *dip)
{
- if (dip->di_version > 1)
- dip->di_onlink = 0;
- else
+ if (dip->di_version < 2) {
dip->di_nlink = 0;
+ return;
+ }
+
+ if (xfs_dinode_is_metadir(dip)) {
+ if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX)
+ dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN);
+ } else {
+ dip->di_metatype = 0;
+ }
}
/* Fix any conflicting flags that the verifiers complain about. */
@@ -565,6 +572,16 @@ xrep_dinode_flags(
dip->di_nrext64_pad = 0;
else if (dip->di_version >= 3)
dip->di_v3_pad = 0;
+
+ if (flags2 & XFS_DIFLAG2_METADATA) {
+ xfs_failaddr_t fa;
+
+ fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags,
+ flags2);
+ if (fa)
+ flags2 &= ~XFS_DIFLAG2_METADATA;
+ }
+
dip->di_flags = cpu_to_be16(flags);
dip->di_flags2 = cpu_to_be64(flags2);
}
@@ -1754,15 +1771,8 @@ xrep_inode_pptr(
if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE))
return 0;
- /* The root directory doesn't have a parent pointer. */
- if (ip == mp->m_rootip)
- return 0;
-
- /*
- * Metadata inodes are rooted in the superblock and do not have any
- * parents.
- */
- if (xfs_is_metadata_inode(ip))
+ /* Children of the superblock do not have parent pointers. */
+ if (xchk_inode_is_sb_rooted(ip))
return 0;
/* Inode already has an attr fork; no further work possible here. */
diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c
new file mode 100644
index 000000000000..edc1a395c401
--- /dev/null
+++ b/fs/xfs/scrub/metapath.c
@@ -0,0 +1,521 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_metafile.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_dir2.h"
+#include "xfs_parent.h"
+#include "xfs_bmap_btree.h"
+#include "xfs_trans_space.h"
+#include "xfs_attr.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/trace.h"
+#include "scrub/readdir.h"
+#include "scrub/repair.h"
+
+/*
+ * Metadata Directory Tree Paths
+ * =============================
+ *
+ * A filesystem with metadir enabled expects to find metadata structures
+ * attached to files that are accessible by walking a path down the metadata
+ * directory tree. Given the metadir path and the incore inode storing the
+ * metadata, this scrubber ensures that the ondisk metadir path points to the
+ * ondisk inode represented by the incore inode.
+ */
+
+struct xchk_metapath {
+ struct xfs_scrub *sc;
+
+ /* Name for lookup */
+ struct xfs_name xname;
+
+ /* Directory update for repairs */
+ struct xfs_dir_update du;
+
+ /* Path down to this metadata file from the parent directory */
+ const char *path;
+
+ /* Directory parent of the metadata file. */
+ struct xfs_inode *dp;
+
+ /* Locks held on dp */
+ unsigned int dp_ilock_flags;
+
+ /* Transaction block reservations */
+ unsigned int link_resblks;
+ unsigned int unlink_resblks;
+
+ /* Parent pointer updates */
+ struct xfs_parent_args link_ppargs;
+ struct xfs_parent_args unlink_ppargs;
+
+ /* Scratchpads for removing links */
+ struct xfs_da_args pptr_args;
+};
+
+/* Release resources tracked in the buffer. */
+static inline void
+xchk_metapath_cleanup(
+ void *buf)
+{
+ struct xchk_metapath *mpath = buf;
+
+ if (mpath->dp_ilock_flags)
+ xfs_iunlock(mpath->dp, mpath->dp_ilock_flags);
+ kfree(mpath->path);
+}
+
+int
+xchk_setup_metapath(
+ struct xfs_scrub *sc)
+{
+ if (!xfs_has_metadir(sc->mp))
+ return -ENOENT;
+ if (sc->sm->sm_gen)
+ return -EINVAL;
+
+ switch (sc->sm->sm_ino) {
+ case XFS_SCRUB_METAPATH_PROBE:
+ /* Just probing, nothing else to do. */
+ if (sc->sm->sm_agno)
+ return -EINVAL;
+ return 0;
+ default:
+ return -ENOENT;
+ }
+}
+
+/*
+ * Take the ILOCK on the metadata directory parent and child. We do not know
+ * that the metadata directory is not corrupt, so we lock the parent and try
+ * to lock the child. Returns 0 if successful, or -EINTR to abort the scrub.
+ */
+STATIC int
+xchk_metapath_ilock_both(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ int error = 0;
+
+ while (true) {
+ xfs_ilock(mpath->dp, XFS_ILOCK_EXCL);
+ if (xchk_ilock_nowait(sc, XFS_ILOCK_EXCL)) {
+ mpath->dp_ilock_flags |= XFS_ILOCK_EXCL;
+ return 0;
+ }
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(1);
+ }
+
+ ASSERT(0);
+ return -EINTR;
+}
+
+/* Unlock parent and child inodes. */
+static inline void
+xchk_metapath_iunlock(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+
+ xchk_iunlock(sc, XFS_ILOCK_EXCL);
+
+ mpath->dp_ilock_flags &= ~XFS_ILOCK_EXCL;
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+}
+
+int
+xchk_metapath(
+ struct xfs_scrub *sc)
+{
+ struct xchk_metapath *mpath = sc->buf;
+ xfs_ino_t ino = NULLFSINO;
+ int error;
+
+ /* Just probing, nothing else to do. */
+ if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE)
+ return 0;
+
+ /* Parent required to do anything else. */
+ if (mpath->dp == NULL) {
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ return 0;
+ }
+
+ error = xchk_trans_alloc_empty(sc);
+ if (error)
+ return error;
+
+ error = xchk_metapath_ilock_both(mpath);
+ if (error)
+ goto out_cancel;
+
+ /* Make sure the parent dir has a dirent pointing to this file. */
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xchk_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /* No directory entry at all */
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ error = 0;
+ goto out_ilock;
+ }
+ if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
+ goto out_ilock;
+ if (ino != sc->ip->i_ino) {
+ /* Pointing to wrong inode */
+ xchk_ino_set_corrupt(sc, sc->ip->i_ino);
+ }
+
+out_ilock:
+ xchk_metapath_iunlock(mpath);
+out_cancel:
+ xchk_trans_cancel(sc);
+ return error;
+}
+
+#ifdef CONFIG_XFS_ONLINE_REPAIR
+/* Create the dirent represented by the final component of the path. */
+STATIC int
+xrep_metapath_link(
+ struct xchk_metapath *mpath)
+{
+ struct xfs_scrub *sc = mpath->sc;
+
+ mpath->du.dp = mpath->dp;
+ mpath->du.name = &mpath->xname;
+ mpath->du.ip = sc->ip;
+
+ if (xfs_has_parent(sc->mp))
+ mpath->du.ppargs = &mpath->link_ppargs;
+ else
+ mpath->du.ppargs = NULL;
+
+ trace_xrep_metapath_link(sc, mpath->path, mpath->dp, sc->ip->i_ino);
+
+ return xfs_dir_add_child(sc->tp, mpath->link_resblks, &mpath->du);
+}
+
+/* Remove the dirent at the final component of the path. */
+STATIC int
+xrep_metapath_unlink(
+ struct xchk_metapath *mpath,
+ xfs_ino_t ino,
+ struct xfs_inode *ip)
+{
+ struct xfs_parent_rec rec;
+ struct xfs_scrub *sc = mpath->sc;
+ struct xfs_mount *mp = sc->mp;
+ int error;
+
+ trace_xrep_metapath_unlink(sc, mpath->path, mpath->dp, ino);
+
+ if (!ip) {
+ /* The child inode isn't allocated. Junk the dirent. */
+ xfs_trans_log_inode(sc->tp, mpath->dp, XFS_ILOG_CORE);
+ return xfs_dir_removename(sc->tp, mpath->dp, &mpath->xname,
+ ino, mpath->unlink_resblks);
+ }
+
+ mpath->du.dp = mpath->dp;
+ mpath->du.name = &mpath->xname;
+ mpath->du.ip = ip;
+ mpath->du.ppargs = NULL;
+
+ /* Figure out if we're removing a parent pointer too. */
+ if (xfs_has_parent(mp)) {
+ xfs_inode_to_parent_rec(&rec, ip);
+ error = xfs_parent_lookup(sc->tp, ip, &mpath->xname, &rec,
+ &mpath->pptr_args);
+ switch (error) {
+ case -ENOATTR:
+ break;
+ case 0:
+ mpath->du.ppargs = &mpath->unlink_ppargs;
+ break;
+ default:
+ return error;
+ }
+ }
+
+ return xfs_dir_remove_child(sc->tp, mpath->unlink_resblks, &mpath->du);
+}
+
+/*
+ * Try to create a dirent in @mpath->dp with the name @mpath->xname that points
+ * to @sc->ip. Returns:
+ *
+ * -EEXIST and an @alleged_child if the dirent that points to the wrong inode;
+ * 0 if there is now a dirent pointing to @sc->ip; or
+ * A negative errno on error.
+ */
+STATIC int
+xrep_metapath_try_link(
+ struct xchk_metapath *mpath,
+ xfs_ino_t *alleged_child)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ xfs_ino_t ino;
+ int error;
+
+ /* Allocate transaction, lock inodes, join to transaction. */
+ error = xchk_trans_alloc(sc, mpath->link_resblks);
+ if (error)
+ return error;
+
+ error = xchk_metapath_ilock_both(mpath);
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+ xfs_trans_ijoin(sc->tp, mpath->dp, 0);
+ xfs_trans_ijoin(sc->tp, sc->ip, 0);
+
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /*
+ * There is no dirent in the directory. Create an entry
+ * pointing to @sc->ip.
+ */
+ error = xrep_metapath_link(mpath);
+ if (error)
+ goto out_cancel;
+
+ error = xrep_trans_commit(sc);
+ xchk_metapath_iunlock(mpath);
+ return error;
+ }
+ if (error)
+ goto out_cancel;
+
+ if (ino == sc->ip->i_ino) {
+ /* The dirent already points to @sc->ip; we're done. */
+ error = 0;
+ goto out_cancel;
+ }
+
+ /*
+ * The dirent points elsewhere; pass that back so that the caller
+ * can try to remove the dirent.
+ */
+ *alleged_child = ino;
+ error = -EEXIST;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+ xchk_metapath_iunlock(mpath);
+ return error;
+}
+
+/*
+ * Take the ILOCK on the metadata directory parent and a bad child, if one is
+ * supplied. We do not know that the metadata directory is not corrupt, so we
+ * lock the parent and try to lock the child. Returns 0 if successful, or
+ * -EINTR to abort the repair. The lock state of @dp is not recorded in @mpath.
+ */
+STATIC int
+xchk_metapath_ilock_parent_and_child(
+ struct xchk_metapath *mpath,
+ struct xfs_inode *ip)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ int error = 0;
+
+ while (true) {
+ xfs_ilock(mpath->dp, XFS_ILOCK_EXCL);
+ if (!ip || xfs_ilock_nowait(ip, XFS_ILOCK_EXCL))
+ return 0;
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+
+ if (xchk_should_terminate(sc, &error))
+ return error;
+
+ delay(1);
+ }
+
+ ASSERT(0);
+ return -EINTR;
+}
+
+/*
+ * Try to remove a dirent in @mpath->dp with the name @mpath->xname that points
+ * to @alleged_child. Returns:
+ *
+ * 0 if there is no longer a dirent;
+ * -EEXIST if the dirent points to @sc->ip;
+ * -EAGAIN and an updated @alleged_child if the dirent points elsewhere; or
+ * A negative errno for any other error.
+ */
+STATIC int
+xrep_metapath_try_unlink(
+ struct xchk_metapath *mpath,
+ xfs_ino_t *alleged_child)
+{
+ struct xfs_scrub *sc = mpath->sc;
+ struct xfs_inode *ip = NULL;
+ xfs_ino_t ino;
+ int error;
+
+ ASSERT(*alleged_child != sc->ip->i_ino);
+
+ trace_xrep_metapath_try_unlink(sc, mpath->path, mpath->dp,
+ *alleged_child);
+
+ /*
+ * Allocate transaction, grab the alleged child inode, lock inodes,
+ * join to transaction.
+ */
+ error = xchk_trans_alloc(sc, mpath->unlink_resblks);
+ if (error)
+ return error;
+
+ error = xchk_iget(sc, *alleged_child, &ip);
+ if (error == -EINVAL || error == -ENOENT) {
+ /* inode number is bogus, junk the dirent */
+ error = 0;
+ }
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+
+ error = xchk_metapath_ilock_parent_and_child(mpath, ip);
+ if (error) {
+ xchk_trans_cancel(sc);
+ return error;
+ }
+ xfs_trans_ijoin(sc->tp, mpath->dp, 0);
+ if (ip)
+ xfs_trans_ijoin(sc->tp, ip, 0);
+
+ error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino);
+ trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino);
+ if (error == -ENOENT) {
+ /*
+ * There is no dirent in the directory anymore. We're ready to
+ * try the link operation again.
+ */
+ error = 0;
+ goto out_cancel;
+ }
+ if (error)
+ goto out_cancel;
+
+ if (ino == sc->ip->i_ino) {
+ /* The dirent already points to @sc->ip; we're done. */
+ error = -EEXIST;
+ goto out_cancel;
+ }
+
+ /*
+ * The dirent does not point to the alleged child. Update the caller
+ * and signal that we want to be called again.
+ */
+ if (ino != *alleged_child) {
+ *alleged_child = ino;
+ error = -EAGAIN;
+ goto out_cancel;
+ }
+
+ /* Remove the link to the child. */
+ error = xrep_metapath_unlink(mpath, ino, ip);
+ if (error)
+ goto out_cancel;
+
+ error = xrep_trans_commit(sc);
+ goto out_unlock;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+out_unlock:
+ xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL);
+ if (ip) {
+ xfs_iunlock(ip, XFS_ILOCK_EXCL);
+ xchk_irele(sc, ip);
+ }
+ return error;
+}
+
+/*
+ * Make sure the metadata directory path points to the child being examined.
+ *
+ * Repair needs to be able to create a directory structure, create its own
+ * transactions, and take ILOCKs. This function /must/ be called after all
+ * other repairs have completed.
+ */
+int
+xrep_metapath(
+ struct xfs_scrub *sc)
+{
+ struct xchk_metapath *mpath = sc->buf;
+ struct xfs_mount *mp = sc->mp;
+ int error = 0;
+
+ /* Just probing, nothing to repair. */
+ if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE)
+ return 0;
+
+ /* Parent required to do anything else. */
+ if (mpath->dp == NULL)
+ return -EFSCORRUPTED;
+
+ /*
+ * Make sure the child file actually has an attr fork to receive a new
+ * parent pointer if the fs has parent pointers.
+ */
+ if (xfs_has_parent(mp)) {
+ error = xfs_attr_add_fork(sc->ip,
+ sizeof(struct xfs_attr_sf_hdr), 1);
+ if (error)
+ return error;
+ }
+
+ /* Compute block reservation required to unlink and link a file. */
+ mpath->unlink_resblks = xfs_remove_space_res(mp, MAXNAMELEN);
+ mpath->link_resblks = xfs_link_space_res(mp, MAXNAMELEN);
+
+ do {
+ xfs_ino_t alleged_child;
+
+ /* Re-establish the link, or tell us which inode to remove. */
+ error = xrep_metapath_try_link(mpath, &alleged_child);
+ if (!error)
+ return 0;
+ if (error != -EEXIST)
+ return error;
+
+ /*
+ * Remove an incorrect link to an alleged child, or tell us
+ * which inode to remove.
+ */
+ do {
+ error = xrep_metapath_try_unlink(mpath, &alleged_child);
+ } while (error == -EAGAIN);
+ if (error == -EEXIST) {
+ /* Link established; we're done. */
+ error = 0;
+ break;
+ }
+ } while (!error);
+
+ return error;
+}
+#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c
index 80aee30886c4..4a47d0aabf73 100644
--- a/fs/xfs/scrub/nlinks.c
+++ b/fs/xfs/scrub/nlinks.c
@@ -279,7 +279,7 @@ xchk_nlinks_collect_dirent(
* determine the backref count.
*/
if (dotdot) {
- if (dp == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(dp))
error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0);
else if (!xfs_has_parent(sc->mp))
error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0);
@@ -735,7 +735,7 @@ xchk_nlinks_compare_inode(
}
}
- if (ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(ip)) {
/*
* For the root of a directory tree, both the '.' and '..'
* entries should point to the root directory. The dotdot
diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c
index b3e707f47b7b..4ebdee095428 100644
--- a/fs/xfs/scrub/nlinks_repair.c
+++ b/fs/xfs/scrub/nlinks_repair.c
@@ -60,11 +60,9 @@ xrep_nlinks_is_orphaned(
unsigned int actual_nlink,
const struct xchk_nlink *obs)
{
- struct xfs_mount *mp = ip->i_mount;
-
if (obs->parents != 0)
return false;
- if (ip == mp->m_rootip || ip == sc->orphanage)
+ if (xchk_inode_is_dirtree_root(ip) || ip == sc->orphanage)
return false;
return actual_nlink != 0;
}
diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c
index 7148d8362db8..c287c755f2c5 100644
--- a/fs/xfs/scrub/orphanage.c
+++ b/fs/xfs/scrub/orphanage.c
@@ -295,7 +295,9 @@ xrep_orphanage_can_adopt(
return false;
if (sc->ip == sc->orphanage)
return false;
- if (xfs_internal_inum(sc->mp, sc->ip->i_ino))
+ if (xchk_inode_is_sb_rooted(sc->ip))
+ return false;
+ if (xfs_is_internal_inode(sc->ip))
return false;
return true;
}
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 91e7b51ce068..3b692c4acc1e 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -132,6 +132,14 @@ xchk_parent_validate(
return 0;
}
+ /* Is this the metadata root dir? Then '..' must point to itself. */
+ if (sc->ip == mp->m_metadirip) {
+ if (sc->ip->i_ino != mp->m_sb.sb_metadirino ||
+ sc->ip->i_ino != parent_ino)
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+ return 0;
+ }
+
/* '..' must not point to ourselves. */
if (sc->ip->i_ino == parent_ino) {
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
@@ -185,6 +193,12 @@ xchk_parent_validate(
goto out_unlock;
}
+ /* Metadata and regular inodes cannot cross trees. */
+ if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) {
+ xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
+ goto out_unlock;
+ }
+
/* Look for a directory entry in the parent pointing to the child. */
error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc);
if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error))
@@ -300,7 +314,7 @@ xchk_parent_pptr_and_dotdot(
}
/* Is this the root dir? Then '..' must point to itself. */
- if (sc->ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(sc->ip)) {
if (sc->ip->i_ino != pp->parent_ino)
xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
return 0;
@@ -711,7 +725,7 @@ xchk_parent_count_pptrs(
}
if (S_ISDIR(VFS_I(sc->ip)->i_mode)) {
- if (sc->ip == sc->mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(sc->ip))
pp->pptrs_found++;
if (VFS_I(sc->ip)->i_nlink == 0 && pp->pptrs_found > 0)
@@ -720,6 +734,14 @@ xchk_parent_count_pptrs(
pp->pptrs_found == 0)
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
} else {
+ /*
+ * Starting with metadir, we allow checking of parent pointers
+ * of non-directory files that are children of the superblock.
+ * Pretend that we found a parent pointer attr.
+ */
+ if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip))
+ pp->pptrs_found++;
+
if (VFS_I(sc->ip)->i_nlink != pp->pptrs_found)
xchk_ino_set_corrupt(sc, sc->ip->i_ino);
}
@@ -885,10 +907,9 @@ bool
xchk_pptr_looks_zapped(
struct xfs_inode *ip)
{
- struct xfs_mount *mp = ip->i_mount;
struct inode *inode = VFS_I(ip);
- ASSERT(xfs_has_parent(mp));
+ ASSERT(xfs_has_parent(ip->i_mount));
/*
* Temporary files that cannot be linked into the directory tree do not
@@ -902,15 +923,15 @@ xchk_pptr_looks_zapped(
* of a parent pointer scan is always the empty set. It's safe to scan
* them even if the attr fork was zapped.
*/
- if (ip == mp->m_rootip)
+ if (xchk_inode_is_dirtree_root(ip))
return false;
/*
- * Metadata inodes are all rooted in the superblock and do not have
- * any parents. Hence the attr fork will not be initialized, but
- * there are no parent pointers that might have been zapped.
+ * Metadata inodes that are rooted in the superblock do not have any
+ * parents. Hence the attr fork will not be initialized, but there are
+ * no parent pointers that might have been zapped.
*/
- if (xfs_is_metadata_inode(ip))
+ if (xchk_inode_is_sb_rooted(ip))
return false;
/*
diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c
index 7b42b7f65a0b..31bfe10be22a 100644
--- a/fs/xfs/scrub/parent_repair.c
+++ b/fs/xfs/scrub/parent_repair.c
@@ -1334,7 +1334,7 @@ xrep_parent_rebuild_pptrs(
* so that we can decide if we're moving this file to the orphanage.
* For this purpose, root directories are their own parents.
*/
- if (sc->ip == sc->mp->m_rootip) {
+ if (xchk_inode_is_dirtree_root(sc->ip)) {
xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino);
} else {
error = xrep_parent_lookup_pptrs(sc, &parent_ino);
@@ -1354,21 +1354,40 @@ STATIC int
xrep_parent_rebuild_tree(
struct xrep_parent *rp)
{
+ struct xfs_scrub *sc = rp->sc;
+ bool try_adoption;
int error;
- if (xfs_has_parent(rp->sc->mp)) {
+ if (xfs_has_parent(sc->mp)) {
error = xrep_parent_rebuild_pptrs(rp);
if (error)
return error;
}
- if (rp->pscan.parent_ino == NULLFSINO) {
- if (xrep_orphanage_can_adopt(rp->sc))
+ /*
+ * Any file with no parent could be adopted. This check happens after
+ * rebuilding the parent pointer structure because we might have cycled
+ * the ILOCK during that process.
+ */
+ try_adoption = rp->pscan.parent_ino == NULLFSINO;
+
+ /*
+ * Starting with metadir, we allow checking of parent pointers
+ * of non-directory files that are children of the superblock.
+ * Lack of parent is ok here.
+ */
+ if (try_adoption && xfs_has_metadir(sc->mp) &&
+ xchk_inode_is_sb_rooted(sc->ip))
+ try_adoption = false;
+
+ if (try_adoption) {
+ if (xrep_orphanage_can_adopt(sc))
return xrep_parent_move_to_orphanage(rp);
return -EFSCORRUPTED;
+
}
- if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode))
+ if (S_ISDIR(VFS_I(sc->ip)->i_mode))
return xrep_parent_reset_dotdot(rp);
return 0;
@@ -1422,6 +1441,14 @@ xrep_parent_set_nondir_nlink(
if (error)
return error;
+ /*
+ * Starting with metadir, we allow checking of parent pointers of
+ * non-directory files that are children of the superblock. Pretend
+ * that we found a parent pointer attr.
+ */
+ if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip))
+ rp->parents++;
+
if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) {
xfs_trans_ijoin(sc->tp, sc->ip, 0);
joined = true;
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
index c77eb2de8df7..dc4033b91e44 100644
--- a/fs/xfs/scrub/quotacheck.c
+++ b/fs/xfs/scrub/quotacheck.c
@@ -398,10 +398,13 @@ xqcheck_collect_inode(
bool isreg = S_ISREG(VFS_I(ip)->i_mode);
int error = 0;
- if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
+ if (xfs_is_metadir_inode(ip) ||
+ xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
/*
* Quota files are never counted towards quota, so we do not
- * need to take the lock.
+ * need to take the lock. Files do not switch between the
+ * metadata and regular directory trees without a reallocation,
+ * so we do not need to ILOCK them either.
*/
xchk_iscan_mark_visited(&xqc->iscan, ip);
return 0;
diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c
index 4240fff459cb..4e572b81c986 100644
--- a/fs/xfs/scrub/refcount_repair.c
+++ b/fs/xfs/scrub/refcount_repair.c
@@ -215,7 +215,7 @@ xrep_refc_rmap_shareable(
return false;
/* Metadata in files are never shareable */
- if (xfs_internal_inum(mp, rmap->rm_owner))
+ if (xfs_is_sb_inum(mp, rmap->rm_owner))
return false;
/* Metadata and unwritten file blocks are not shareable. */
diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c
index 646ac8ade88d..f80000d77552 100644
--- a/fs/xfs/scrub/repair.c
+++ b/fs/xfs/scrub/repair.c
@@ -1082,7 +1082,12 @@ xrep_metadata_inode_forks(
if (error)
return error;
- /* Make sure the attr fork looks ok before we delete it. */
+ /*
+ * Metadata files can only have extended attributes on metadir
+ * filesystems, either for parent pointers or for actual xattr data.
+ * For a non-metadir filesystem, make sure the attr fork looks ok
+ * before we delete it.
+ */
if (xfs_inode_hasattr(sc->ip)) {
error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA);
if (error)
@@ -1098,8 +1103,11 @@ xrep_metadata_inode_forks(
return error;
}
- /* Clear the attr forks since metadata shouldn't have that. */
- if (xfs_inode_hasattr(sc->ip)) {
+ /*
+ * Metadata files on non-metadir filesystems cannot have attr forks,
+ * so clear them now.
+ */
+ if (xfs_inode_hasattr(sc->ip) && !xfs_has_metadir(sc->mp)) {
if (!dirty) {
dirty = true;
xfs_trans_ijoin(sc->tp, sc->ip, 0);
diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h
index 0e0dc2bf985c..90f9cb3b5ad8 100644
--- a/fs/xfs/scrub/repair.h
+++ b/fs/xfs/scrub/repair.h
@@ -134,6 +134,7 @@ int xrep_directory(struct xfs_scrub *sc);
int xrep_parent(struct xfs_scrub *sc);
int xrep_symlink(struct xfs_scrub *sc);
int xrep_dirtree(struct xfs_scrub *sc);
+int xrep_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xrep_rtbitmap(struct xfs_scrub *sc);
@@ -208,6 +209,7 @@ xrep_setup_nothing(
#define xrep_setup_parent xrep_setup_nothing
#define xrep_setup_nlinks xrep_setup_nothing
#define xrep_setup_dirtree xrep_setup_nothing
+#define xrep_setup_metapath xrep_setup_nothing
#define xrep_setup_inode(sc, imap) ((void)0)
@@ -243,6 +245,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x)
#define xrep_parent xrep_notsupported
#define xrep_symlink xrep_notsupported
#define xrep_dirtree xrep_notsupported
+#define xrep_metapath xrep_notsupported
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 4cbcf7a86dbe..1ac33bea6f0a 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -442,6 +442,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.has = xfs_has_parent,
.repair = xrep_dirtree,
},
+ [XFS_SCRUB_TYPE_METAPATH] = { /* metadata directory tree path */
+ .type = ST_GENERIC,
+ .setup = xchk_setup_metapath,
+ .scrub = xchk_metapath,
+ .has = xfs_has_metadir,
+ .repair = xrep_metapath,
+ },
};
static int
@@ -489,6 +496,8 @@ xchk_validate_inputs(
if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino))
goto out;
break;
+ case ST_GENERIC:
+ break;
default:
goto out;
}
@@ -605,8 +614,7 @@ xfs_scrub_metadata(
if (error)
goto out;
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB,
- "EXPERIMENTAL online scrub feature in use. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB);
sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS);
if (!sc) {
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 5993fcaffb2c..c688ff4fc7fc 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -73,6 +73,7 @@ enum xchk_type {
ST_PERAG, /* per-AG metadata */
ST_FS, /* per-FS metadata */
ST_INODE, /* per-inode metadata */
+ ST_GENERIC, /* determined by the scrubber */
};
struct xchk_meta_ops {
@@ -255,6 +256,7 @@ int xchk_xattr(struct xfs_scrub *sc);
int xchk_symlink(struct xfs_scrub *sc);
int xchk_parent(struct xfs_scrub *sc);
int xchk_dirtree(struct xfs_scrub *sc);
+int xchk_metapath(struct xfs_scrub *sc);
#ifdef CONFIG_XFS_RT
int xchk_rtbitmap(struct xfs_scrub *sc);
int xchk_rtsummary(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c
index 7996c2335476..edcd02dc2e62 100644
--- a/fs/xfs/scrub/stats.c
+++ b/fs/xfs/scrub/stats.c
@@ -80,6 +80,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck",
[XFS_SCRUB_TYPE_NLINKS] = "nlinks",
[XFS_SCRUB_TYPE_DIRTREE] = "dirtree",
+ [XFS_SCRUB_TYPE_METAPATH] = "metapath",
};
/* Format the scrub stats into a text buffer, similar to pcp style. */
diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c
index 177f922acfaf..4b7f7860e37e 100644
--- a/fs/xfs/scrub/tempfile.c
+++ b/fs/xfs/scrub/tempfile.c
@@ -22,6 +22,7 @@
#include "xfs_exchmaps.h"
#include "xfs_defer.h"
#include "xfs_symlink_remote.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
@@ -182,6 +183,101 @@ out_release_dquots:
return error;
}
+/*
+ * Temporary files have to be created before we even know which inode we're
+ * going to scrub, so we assume that they will be part of the regular directory
+ * tree. If it turns out that we're actually scrubbing a file from the
+ * metadata directory tree, we have to subtract the temp file from the root
+ * dquots and detach the dquots.
+ */
+int
+xrep_tempfile_adjust_directory_tree(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!sc->tempip)
+ return 0;
+
+ ASSERT(sc->tp == NULL);
+ ASSERT(!xfs_is_metadir_inode(sc->tempip));
+
+ if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
+ return 0;
+
+ xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
+ sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ goto out_iolock;
+
+ xrep_tempfile_ilock(sc);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+
+ /* Metadir files are not accounted in quota, so drop icount */
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L);
+ xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN);
+
+ error = xrep_trans_commit(sc);
+ if (error)
+ goto out_ilock;
+
+ xfs_qm_dqdetach(sc->tempip);
+out_ilock:
+ xrep_tempfile_iunlock(sc);
+out_iolock:
+ xrep_tempfile_iounlock(sc);
+ return error;
+}
+
+/*
+ * Remove this temporary file from the metadata directory tree so that it can
+ * be inactivated the normal way.
+ */
+STATIC int
+xrep_tempfile_remove_metadir(
+ struct xfs_scrub *sc)
+{
+ int error;
+
+ if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip))
+ return 0;
+
+ ASSERT(sc->tp == NULL);
+
+ xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
+ sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
+
+ error = xchk_trans_alloc(sc, 0);
+ if (error)
+ goto out_iolock;
+
+ xrep_tempfile_ilock(sc);
+ xfs_trans_ijoin(sc->tp, sc->tempip, 0);
+
+ xfs_metafile_clear_iflag(sc->tp, sc->tempip);
+
+ /* Non-metadir files are accounted in quota, so bump bcount/icount */
+ error = xfs_qm_dqattach_locked(sc->tempip, false);
+ if (error)
+ goto out_cancel;
+
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L);
+ xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT,
+ sc->tempip->i_nblocks);
+ error = xrep_trans_commit(sc);
+ goto out_ilock;
+
+out_cancel:
+ xchk_trans_cancel(sc);
+out_ilock:
+ xrep_tempfile_iunlock(sc);
+out_iolock:
+ xrep_tempfile_iounlock(sc);
+ return error;
+}
+
/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
@@ -290,6 +386,7 @@ xrep_tempfile_rele(
sc->temp_ilock_flags = 0;
}
+ xrep_tempfile_remove_metadir(sc);
xchk_irele(sc, sc->tempip);
sc->tempip = NULL;
}
@@ -844,6 +941,14 @@ xrep_is_tempfile(
const struct xfs_inode *ip)
{
const struct inode *inode = &ip->i_vnode;
+ struct xfs_mount *mp = ip->i_mount;
+
+ /*
+ * Files in the metadata directory tree also have S_PRIVATE set and
+ * IOP_XATTR unset, so we must distinguish them separately.
+ */
+ if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
+ return false;
if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
return true;
diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h
index e51399f595fe..71c1b54599c3 100644
--- a/fs/xfs/scrub/tempfile.h
+++ b/fs/xfs/scrub/tempfile.h
@@ -10,6 +10,8 @@
int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode);
void xrep_tempfile_rele(struct xfs_scrub *sc);
+int xrep_tempfile_adjust_directory_tree(struct xfs_scrub *sc);
+
bool xrep_tempfile_iolock_nowait(struct xfs_scrub *sc);
int xrep_tempfile_iolock_polled(struct xfs_scrub *sc);
void xrep_tempfile_iounlock(struct xfs_scrub *sc);
@@ -42,6 +44,7 @@ static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc)
xchk_ilock(sc, XFS_IOLOCK_EXCL);
}
# define xrep_is_tempfile(ip) (false)
+# define xrep_tempfile_adjust_directory_tree(sc) (0)
# define xrep_tempfile_rele(sc)
#endif /* CONFIG_XFS_ONLINE_REPAIR */
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 4470ad0533b8..98f923ae664d 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -20,6 +20,7 @@
#include "xfs_dir2.h"
#include "xfs_rmap.h"
#include "xfs_parent.h"
+#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/xfile.h"
#include "scrub/xfarray.h"
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 58cc61f2ed53..b6c8d0944fa4 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -70,6 +70,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE);
TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
+TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH);
#define XFS_SCRUB_TYPE_STRINGS \
{ XFS_SCRUB_TYPE_PROBE, "probe" }, \
@@ -101,7 +102,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER);
{ XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \
{ XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \
{ XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \
- { XFS_SCRUB_TYPE_BARRIER, "barrier" }
+ { XFS_SCRUB_TYPE_BARRIER, "barrier" }, \
+ { XFS_SCRUB_TYPE_METAPATH, "metapath" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -1753,6 +1755,7 @@ DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_badgen);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_nondir_parent);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_unlinked_parent);
DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_found_next_step);
+DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_crosses_tree);
TRACE_DEFINE_ENUM(XCHK_DIRPATH_SCANNING);
TRACE_DEFINE_ENUM(XCHK_DIRPATH_DELETE);
@@ -1915,6 +1918,38 @@ TRACE_EVENT(xchk_dirtree_live_update,
__get_str(name))
);
+DECLARE_EVENT_CLASS(xchk_metapath_class,
+ TP_PROTO(struct xfs_scrub *sc, const char *path,
+ struct xfs_inode *dp, xfs_ino_t ino),
+ TP_ARGS(sc, path, dp, ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, scrub_ino)
+ __field(xfs_ino_t, parent_ino)
+ __field(xfs_ino_t, ino)
+ __string(name, path)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->scrub_ino = sc->ip ? sc->ip->i_ino : NULLFSINO;
+ __entry->parent_ino = dp ? dp->i_ino : NULLFSINO;
+ __entry->ino = ino;
+ __assign_str(name);
+ ),
+ TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx name '%s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->scrub_ino,
+ __entry->parent_ino,
+ __get_str(name),
+ __entry->ino)
+);
+#define DEFINE_XCHK_METAPATH_EVENT(name) \
+DEFINE_EVENT(xchk_metapath_class, name, \
+ TP_PROTO(struct xfs_scrub *sc, const char *path, \
+ struct xfs_inode *dp, xfs_ino_t ino), \
+ TP_ARGS(sc, path, dp, ino))
+DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup);
+
/* repair tracepoints */
#if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR)
@@ -3563,6 +3598,11 @@ DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_delete_path);
DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_create_adoption);
DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xrep_dirtree_decided_fate);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_lookup);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink);
+DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link);
+
#endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */
#endif /* _TRACE_XFS_SCRUB_TRACE_H */
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index c1b211c260a9..3bf47458c517 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -983,6 +983,7 @@ xfs_qm_dqget_inode(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(xfs_inode_dquot(ip, type) == NULL);
+ ASSERT(!xfs_is_metadir_inode(ip));
id = xfs_qm_id_for_quotatype(ip, type);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 82812a458cf1..28dde215c899 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -162,9 +162,7 @@ xfs_growfs_data_private(
error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount,
delta, last_pag, &lastag_extended);
} else {
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK,
- "EXPERIMENTAL online shrink feature in use. Use at your own risk!");
-
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SHRINK);
error = xfs_ag_shrink_space(last_pag, &tp, -delta);
}
xfs_perag_put(last_pag);
diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c
index f45f125a669d..e5663e2ac9b8 100644
--- a/fs/xfs/xfs_health.c
+++ b/fs/xfs/xfs_health.c
@@ -380,6 +380,8 @@ static const struct ioctl_sick_map fs_map[] = {
{ XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA },
{ XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK },
{ XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS },
+ { XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR },
+ { XFS_SICK_FS_METAPATH, XFS_FSOP_GEOM_SICK_METAPATH },
{ 0, 0 },
};
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 383c24548202..7b6c026d01a1 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -25,6 +25,9 @@
#include "xfs_ag.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_dir2.h"
+#include "xfs_metafile.h"
#include <linux/iversion.h>
@@ -829,6 +832,77 @@ out_error_or_again:
}
/*
+ * Get a metadata inode.
+ *
+ * The metafile type must match the file mode exactly, and for files in the
+ * metadata directory tree, it must match the inode's metatype exactly.
+ */
+int
+xfs_trans_metafile_iget(
+ struct xfs_trans *tp,
+ xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_inode *ip;
+ umode_t mode;
+ int error;
+
+ error = xfs_iget(mp, tp, ino, 0, 0, &ip);
+ if (error == -EFSCORRUPTED || error == -EINVAL)
+ goto whine;
+ if (error)
+ return error;
+
+ if (VFS_I(ip)->i_nlink == 0)
+ goto bad_rele;
+
+ if (metafile_type == XFS_METAFILE_DIR)
+ mode = S_IFDIR;
+ else
+ mode = S_IFREG;
+ if (inode_wrong_type(VFS_I(ip), mode))
+ goto bad_rele;
+ if (xfs_has_metadir(mp)) {
+ if (!xfs_is_metadir_inode(ip))
+ goto bad_rele;
+ if (metafile_type != ip->i_metatype)
+ goto bad_rele;
+ }
+
+ *ipp = ip;
+ return 0;
+bad_rele:
+ xfs_irele(ip);
+whine:
+ xfs_err(mp, "metadata inode 0x%llx type %u is corrupt", ino,
+ metafile_type);
+ xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR);
+ return -EFSCORRUPTED;
+}
+
+/* Grab a metadata file if the caller doesn't already have a transaction. */
+int
+xfs_metafile_iget(
+ struct xfs_mount *mp,
+ xfs_ino_t ino,
+ enum xfs_metafile_type metafile_type,
+ struct xfs_inode **ipp)
+{
+ struct xfs_trans *tp;
+ int error;
+
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp);
+ xfs_trans_cancel(tp);
+ return error;
+}
+
+/*
* Grab the inode for reclaim exclusively.
*
* We have found this inode via a lookup under RCU, so the inode may have
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 693770f9bb09..103cf8b2af24 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -43,6 +43,7 @@
#include "xfs_parent.h"
#include "xfs_xattr.h"
#include "xfs_inode_util.h"
+#include "xfs_metafile.h"
struct kmem_cache *xfs_inode_cache;
@@ -554,8 +555,20 @@ xfs_lookup(
if (error)
goto out_free_name;
+ /*
+ * Fail if a directory entry in the regular directory tree points to
+ * a metadata file.
+ */
+ if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) {
+ xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR);
+ error = -EFSCORRUPTED;
+ goto out_irele;
+ }
+
return 0;
+out_irele:
+ xfs_irele(*ipp);
out_free_name:
if (ci_name)
kfree(ci_name->name);
@@ -1295,7 +1308,7 @@ xfs_inode_needs_inactive(
return false;
/* Metadata inodes require explicit resource cleanup. */
- if (xfs_is_metadata_inode(ip))
+ if (xfs_is_internal_inode(ip))
return false;
/* Want to clean out the cow blocks if there are any. */
@@ -1388,7 +1401,7 @@ xfs_inactive(
goto out;
/* Metadata inodes require explicit resource cleanup. */
- if (xfs_is_metadata_inode(ip))
+ if (xfs_is_internal_inode(ip))
goto out;
/* Try to clean out the cow blocks if there are any. */
@@ -3040,7 +3053,7 @@ xfs_inode_alloc_unitsize(
/* Should we always be using copy on write for file writes? */
bool
xfs_is_always_cow_inode(
- struct xfs_inode *ip)
+ const struct xfs_inode *ip)
{
return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount);
}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 03944b6c5fba..b6e959563547 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -65,6 +65,7 @@ typedef struct xfs_inode {
uint16_t i_flushiter; /* incremented on flush */
};
uint8_t i_forkoff; /* attr fork offset >> 3 */
+ enum xfs_metafile_type i_metatype; /* XFS_METAFILE_* */
uint16_t i_diflags; /* XFS_DIFLAG_... */
uint64_t i_diflags2; /* XFS_DIFLAG2_... */
struct timespec64 i_crtime; /* time created */
@@ -100,7 +101,7 @@ static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip)
return ip->i_prev_unlinked != 0;
}
-static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip)
+static inline bool xfs_inode_has_attr_fork(const struct xfs_inode *ip)
{
return ip->i_forkoff > 0;
}
@@ -271,23 +272,36 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags)
return ret;
}
-static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
+static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
}
-static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip)
+static inline bool xfs_is_metadir_inode(const struct xfs_inode *ip)
+{
+ return ip->i_diflags2 & XFS_DIFLAG2_METADATA;
+}
+
+static inline bool xfs_is_internal_inode(const struct xfs_inode *ip)
{
struct xfs_mount *mp = ip->i_mount;
+ /* Any file in the metadata directory tree is a metadata inode. */
+ if (xfs_has_metadir(mp))
+ return xfs_is_metadir_inode(ip);
+
+ /*
+ * Before metadata directories, the only metadata inodes were the
+ * three quota files, the realtime bitmap, and the realtime summary.
+ */
return ip->i_ino == mp->m_sb.sb_rbmino ||
ip->i_ino == mp->m_sb.sb_rsumino ||
xfs_is_quota_inode(&mp->m_sb, ip->i_ino);
}
-bool xfs_is_always_cow_inode(struct xfs_inode *ip);
+bool xfs_is_always_cow_inode(const struct xfs_inode *ip);
-static inline bool xfs_is_cow_inode(struct xfs_inode *ip)
+static inline bool xfs_is_cow_inode(const struct xfs_inode *ip)
{
return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip);
}
@@ -301,17 +315,17 @@ static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip)
* Check if an inode has any data in the COW fork. This might be often false
* even for inodes with the reflink flag when there is no pending COW operation.
*/
-static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip)
+static inline bool xfs_inode_has_cow_data(const struct xfs_inode *ip)
{
return ip->i_cowfp && ip->i_cowfp->if_bytes;
}
-static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip)
+static inline bool xfs_inode_has_bigtime(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_BIGTIME;
}
-static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
+static inline bool xfs_inode_has_large_extent_counts(const struct xfs_inode *ip)
{
return ip->i_diflags2 & XFS_DIFLAG2_NREXT64;
}
@@ -320,7 +334,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip)
* Decide if this file is a realtime file whose data allocation unit is larger
* than a single filesystem block.
*/
-static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip)
+static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip)
{
return XFS_IS_REALTIME_INODE(ip) && ip->i_mount->m_sb.sb_rextsize > 1;
}
@@ -625,9 +639,9 @@ void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes);
static inline bool
xfs_inode_unlinked_incomplete(
- struct xfs_inode *ip)
+ const struct xfs_inode *ip)
{
- return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
+ return VFS_IC(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip);
}
int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip);
int xfs_inode_reload_unlinked(struct xfs_inode *ip);
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index b509cbd191f4..912f0b1bc3cb 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -556,7 +556,6 @@ xfs_inode_to_log_dinode(
to->di_projid_lo = ip->i_projid & 0xffff;
to->di_projid_hi = ip->i_projid >> 16;
- memset(to->di_pad3, 0, sizeof(to->di_pad3));
to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode_get_atime(inode));
to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode_get_mtime(inode));
to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode));
@@ -590,10 +589,16 @@ xfs_inode_to_log_dinode(
/* dummy value for initialisation */
to->di_crc = 0;
+
+ if (xfs_is_metadir_inode(ip))
+ to->di_metatype = ip->i_metatype;
+ else
+ to->di_metatype = 0;
} else {
to->di_version = 2;
to->di_flushiter = ip->i_flushiter;
memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad));
+ to->di_metatype = 0;
}
xfs_inode_to_log_dinode_iext_counters(ip, to);
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index dbdab4ce7c44..e70d2611456b 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -175,7 +175,7 @@ xfs_log_dinode_to_disk(
to->di_mode = cpu_to_be16(from->di_mode);
to->di_version = from->di_version;
to->di_format = from->di_format;
- to->di_onlink = 0;
+ to->di_metatype = cpu_to_be16(from->di_metatype);
to->di_uid = cpu_to_be32(from->di_uid);
to->di_gid = cpu_to_be32(from->di_gid);
to->di_nlink = cpu_to_be32(from->di_nlink);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 2567fd2a0994..f36fd8db388c 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -233,6 +233,10 @@ xfs_bulk_ireq_setup(
if (hdr->flags & XFS_BULK_IREQ_NREXT64)
breq->flags |= XFS_IBULK_NREXT64;
+ /* Caller wants to see metadata directories in bulkstat output. */
+ if (hdr->flags & XFS_BULK_IREQ_METADIR)
+ breq->flags |= XFS_IBULK_METADIR;
+
return 0;
}
@@ -323,6 +327,9 @@ xfs_ioc_inumbers(
if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr)))
return -EFAULT;
+ if (hdr.flags & XFS_BULK_IREQ_METADIR)
+ return -EINVAL;
+
error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers);
if (error == -ECANCELED)
goto out_teardown;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index ee79cf161312..66a726a5fbbb 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -42,7 +42,9 @@
* held. For regular files, the lock order is the other way around - the
* mmap_lock is taken during the page fault, and then we lock the ilock to do
* block mapping. Hence we need a different class for the directory ilock so
- * that lockdep can tell them apart.
+ * that lockdep can tell them apart. Directories in the metadata directory
+ * tree get a separate class so that lockdep reports will warn us if someone
+ * ever tries to lock regular directories after locking metadata directories.
*/
static struct lock_class_key xfs_nondir_ilock_class;
static struct lock_class_key xfs_dir_ilock_class;
@@ -1289,6 +1291,7 @@ xfs_setup_inode(
{
struct inode *inode = &ip->i_vnode;
gfp_t gfp_mask;
+ bool is_meta = xfs_is_internal_inode(ip);
inode->i_ino = ip->i_ino;
inode->i_state |= I_NEW;
@@ -1300,6 +1303,16 @@ xfs_setup_inode(
i_size_write(inode, ip->i_disk_size);
xfs_diflags_to_iflags(ip, true);
+ /*
+ * Mark our metadata files as private so that LSMs and the ACL code
+ * don't try to add their own metadata or reason about these files,
+ * and users cannot ever obtain file handles to them.
+ */
+ if (is_meta) {
+ inode->i_flags |= S_PRIVATE;
+ inode->i_opflags &= ~IOP_XATTR;
+ }
+
if (S_ISDIR(inode->i_mode)) {
/*
* We set the i_rwsem class here to avoid potential races with
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index c0757ab99495..1fa1c0564b0c 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -36,6 +36,14 @@ struct xfs_bstat_chunk {
struct xfs_bulkstat *buf;
};
+static inline bool
+want_metadir_file(
+ struct xfs_inode *ip,
+ struct xfs_ibulk *breq)
+{
+ return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR);
+}
+
/*
* Fill out the bulkstat info for a single inode and report it somewhere.
*
@@ -69,9 +77,6 @@ xfs_bulkstat_one_int(
vfsuid_t vfsuid;
vfsgid_t vfsgid;
- if (xfs_internal_inum(mp, ino))
- goto out_advance;
-
error = xfs_iget(mp, tp, ino,
(XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
XFS_ILOCK_SHARED, &ip);
@@ -97,8 +102,28 @@ xfs_bulkstat_one_int(
vfsuid = i_uid_into_vfsuid(idmap, inode);
vfsgid = i_gid_into_vfsgid(idmap, inode);
+ /*
+ * If caller wants files from the metadata directories, push out the
+ * bare minimum information for enabling scrub.
+ */
+ if (want_metadir_file(ip, bc->breq)) {
+ memset(buf, 0, sizeof(*buf));
+ buf->bs_ino = ino;
+ buf->bs_gen = inode->i_generation;
+ buf->bs_mode = inode->i_mode & S_IFMT;
+ xfs_bulkstat_health(ip, buf);
+ buf->bs_version = XFS_BULKSTAT_VERSION_V5;
+ xfs_iunlock(ip, XFS_ILOCK_SHARED);
+ xfs_irele(ip);
+
+ error = bc->formatter(bc->breq, buf);
+ if (!error || error == -ECANCELED)
+ goto out_advance;
+ goto out;
+ }
+
/* If this is a private inode, don't leak its details to userspace. */
- if (IS_PRIVATE(inode)) {
+ if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) {
xfs_iunlock(ip, XFS_ILOCK_SHARED);
xfs_irele(ip);
error = -EINVAL;
diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h
index 1659f13f17a8..f10e8f8f2335 100644
--- a/fs/xfs/xfs_itable.h
+++ b/fs/xfs/xfs_itable.h
@@ -22,6 +22,9 @@ struct xfs_ibulk {
/* Fill out the bs_extents64 field if set. */
#define XFS_IBULK_NREXT64 (1U << 1)
+/* Signal that we can return metadata directories. */
+#define XFS_IBULK_METADIR (1U << 2)
+
/*
* Advance the user buffer pointer by one record of the given size. If the
* buffer is now full, return the appropriate error code.
diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c
index 8f495cc23903..6ed485ff2756 100644
--- a/fs/xfs/xfs_message.c
+++ b/fs/xfs/xfs_message.c
@@ -131,3 +131,54 @@ xfs_buf_alert_ratelimited(
__xfs_printk(KERN_ALERT, mp, &vaf);
va_end(args);
}
+
+void
+xfs_warn_experimental(
+ struct xfs_mount *mp,
+ enum xfs_experimental_feat feat)
+{
+ static const struct {
+ const char *name;
+ long opstate;
+ } features[] = {
+ [XFS_EXPERIMENTAL_PNFS] = {
+ .opstate = XFS_OPSTATE_WARNED_PNFS,
+ .name = "pNFS",
+ },
+ [XFS_EXPERIMENTAL_SCRUB] = {
+ .opstate = XFS_OPSTATE_WARNED_SCRUB,
+ .name = "online scrub",
+ },
+ [XFS_EXPERIMENTAL_SHRINK] = {
+ .opstate = XFS_OPSTATE_WARNED_SHRINK,
+ .name = "online shrink",
+ },
+ [XFS_EXPERIMENTAL_LARP] = {
+ .opstate = XFS_OPSTATE_WARNED_LARP,
+ .name = "logged extended attributes",
+ },
+ [XFS_EXPERIMENTAL_LBS] = {
+ .opstate = XFS_OPSTATE_WARNED_LBS,
+ .name = "large block size",
+ },
+ [XFS_EXPERIMENTAL_EXCHRANGE] = {
+ .opstate = XFS_OPSTATE_WARNED_EXCHRANGE,
+ .name = "exchange range",
+ },
+ [XFS_EXPERIMENTAL_PPTR] = {
+ .opstate = XFS_OPSTATE_WARNED_PPTR,
+ .name = "parent pointer",
+ },
+ [XFS_EXPERIMENTAL_METADIR] = {
+ .opstate = XFS_OPSTATE_WARNED_METADIR,
+ .name = "metadata directory tree",
+ },
+ };
+ ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX);
+ BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX);
+
+ if (xfs_should_warn(mp, features[feat].opstate))
+ xfs_warn(mp,
+ "EXPERIMENTAL %s feature enabled. Use at your own risk!",
+ features[feat].name);
+}
diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h
index cc323775a12c..7fb36ced9df7 100644
--- a/fs/xfs/xfs_message.h
+++ b/fs/xfs/xfs_message.h
@@ -75,12 +75,6 @@ do { \
#define xfs_debug_ratelimited(dev, fmt, ...) \
xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__)
-#define xfs_warn_mount(mp, warntag, fmt, ...) \
-do { \
- if (xfs_should_warn((mp), (warntag))) \
- xfs_warn((mp), (fmt), ##__VA_ARGS__); \
-} while (0)
-
#define xfs_warn_once(dev, fmt, ...) \
xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__)
#define xfs_notice_once(dev, fmt, ...) \
@@ -96,4 +90,18 @@ extern void xfs_hex_dump(const void *p, int length);
void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg,
const char *fmt, ...);
+enum xfs_experimental_feat {
+ XFS_EXPERIMENTAL_PNFS,
+ XFS_EXPERIMENTAL_SCRUB,
+ XFS_EXPERIMENTAL_SHRINK,
+ XFS_EXPERIMENTAL_LARP,
+ XFS_EXPERIMENTAL_LBS,
+ XFS_EXPERIMENTAL_EXCHRANGE,
+ XFS_EXPERIMENTAL_PPTR,
+ XFS_EXPERIMENTAL_METADIR,
+
+ XFS_EXPERIMENTAL_MAX,
+};
+void xfs_warn_experimental(struct xfs_mount *mp, enum xfs_experimental_feat f);
+
#endif /* __XFS_MESSAGE_H */
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index 25bbcc3f4ee0..2dd2606fc7e3 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -35,6 +35,7 @@
#include "xfs_trace.h"
#include "xfs_ag.h"
#include "xfs_rtbitmap.h"
+#include "xfs_metafile.h"
#include "scrub/stats.h"
static DEFINE_MUTEX(xfs_uuid_table_mutex);
@@ -620,6 +621,22 @@ xfs_mount_setup_inode_geom(
xfs_ialloc_setup_geometry(mp);
}
+/* Mount the metadata directory tree root. */
+STATIC int
+xfs_mount_setup_metadir(
+ struct xfs_mount *mp)
+{
+ int error;
+
+ /* Load the metadata directory root inode into memory. */
+ error = xfs_metafile_iget(mp, mp->m_sb.sb_metadirino, XFS_METAFILE_DIR,
+ &mp->m_metadirip);
+ if (error)
+ xfs_warn(mp, "Failed to load metadir root directory, error %d",
+ error);
+ return error;
+}
+
/* Compute maximum possible height for per-AG btree types for this fs. */
static inline void
xfs_agbtree_compute_maxlevels(
@@ -866,6 +883,12 @@ xfs_mountfs(
mp->m_features |= XFS_FEAT_ATTR2;
}
+ if (xfs_has_metadir(mp)) {
+ error = xfs_mount_setup_metadir(mp);
+ if (error)
+ goto out_free_metadir;
+ }
+
/*
* Get and sanity-check the root inode.
* Save the pointer to it in the mount structure.
@@ -876,7 +899,7 @@ xfs_mountfs(
xfs_warn(mp,
"Failed to read root inode 0x%llx, error %d",
sbp->sb_rootino, -error);
- goto out_log_dealloc;
+ goto out_free_metadir;
}
ASSERT(rip != NULL);
@@ -1018,6 +1041,9 @@ xfs_mountfs(
xfs_irele(rip);
/* Clean out dquots that might be in memory after quotacheck. */
xfs_qm_unmount(mp);
+ out_free_metadir:
+ if (mp->m_metadirip)
+ xfs_irele(mp->m_metadirip);
/*
* Inactivate all inodes that might still be in memory after a log
@@ -1039,7 +1065,6 @@ xfs_mountfs(
* quota inodes.
*/
xfs_unmount_flush_inodes(mp);
- out_log_dealloc:
xfs_log_mount_cancel(mp);
out_inodegc_shrinker:
shrinker_free(mp->m_inodegc_shrinker);
@@ -1091,6 +1116,8 @@ xfs_unmountfs(
xfs_qm_unmount_quotas(mp);
xfs_rtunmount_inodes(mp);
xfs_irele(mp->m_rootip);
+ if (mp->m_metadirip)
+ xfs_irele(mp->m_metadirip);
xfs_unmount_flush_inodes(mp);
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 1b698878f40c..71ed04ddd737 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -127,6 +127,7 @@ typedef struct xfs_mount {
struct xfs_inode *m_rbmip; /* pointer to bitmap inode */
struct xfs_inode *m_rsumip; /* pointer to summary inode */
struct xfs_inode *m_rootip; /* pointer to root directory */
+ struct xfs_inode *m_metadirip; /* ptr to metadata directory */
struct xfs_quotainfo *m_quotainfo; /* disk quota information */
struct xfs_buftarg *m_ddev_targp; /* data device */
struct xfs_buftarg *m_logdev_targp;/* log device */
@@ -332,6 +333,7 @@ typedef struct xfs_mount {
#define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */
#define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */
#define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */
+#define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */
/* Mount features */
#define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */
@@ -387,6 +389,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME)
__XFS_HAS_FEAT(needsrepair, NEEDSREPAIR)
__XFS_HAS_FEAT(large_extent_counts, NREXT64)
__XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE)
+__XFS_HAS_FEAT(metadir, METADIR)
/*
* Some features are always on for v5 file systems, allow the compiler to
@@ -467,18 +470,28 @@ __XFS_HAS_FEAT(nouuid, NOUUID)
*/
#define XFS_OPSTATE_BLOCKGC_ENABLED 6
+/* Kernel has logged a warning about pNFS being used on this fs. */
+#define XFS_OPSTATE_WARNED_PNFS 7
/* Kernel has logged a warning about online fsck being used on this fs. */
-#define XFS_OPSTATE_WARNED_SCRUB 7
+#define XFS_OPSTATE_WARNED_SCRUB 8
/* Kernel has logged a warning about shrink being used on this fs. */
-#define XFS_OPSTATE_WARNED_SHRINK 8
+#define XFS_OPSTATE_WARNED_SHRINK 9
/* Kernel has logged a warning about logged xattr updates being used. */
-#define XFS_OPSTATE_WARNED_LARP 9
+#define XFS_OPSTATE_WARNED_LARP 10
/* Mount time quotacheck is running */
-#define XFS_OPSTATE_QUOTACHECK_RUNNING 10
+#define XFS_OPSTATE_QUOTACHECK_RUNNING 11
/* Do we want to clear log incompat flags? */
-#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 11
+#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12
/* Filesystem can use logged extended attributes */
-#define XFS_OPSTATE_USE_LARP 12
+#define XFS_OPSTATE_USE_LARP 13
+/* Kernel has logged a warning about blocksize > pagesize on this fs. */
+#define XFS_OPSTATE_WARNED_LBS 14
+/* Kernel has logged a warning about exchange-range being used on this fs. */
+#define XFS_OPSTATE_WARNED_EXCHRANGE 15
+/* Kernel has logged a warning about parent pointers being used on this fs. */
+#define XFS_OPSTATE_WARNED_PPTR 16
+/* Kernel has logged a warning about metadata dirs being used on this fs. */
+#define XFS_OPSTATE_WARNED_METADIR 17
#define __XFS_IS_OPSTATE(name, NAME) \
static inline bool xfs_is_ ## name (struct xfs_mount *mp) \
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index 23d16186e1a3..6f4479deac6d 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -58,8 +58,7 @@ xfs_fs_get_uuid(
{
struct xfs_mount *mp = XFS_M(sb);
- xfs_notice_once(mp,
-"Using experimental pNFS feature, use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS);
if (*len < sizeof(uuid_t))
return -EINVAL;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 7e2307921deb..b94d6f192e72 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -27,6 +27,8 @@
#include "xfs_ialloc.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_metafile.h"
/*
* The global quota manager. There is only one of these for the entire
@@ -302,6 +304,8 @@ xfs_qm_need_dqattach(
return false;
if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
return false;
+ if (xfs_is_metadir_inode(ip))
+ return false;
return true;
}
@@ -324,6 +328,7 @@ xfs_qm_dqattach_locked(
return 0;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ ASSERT(!xfs_is_metadir_inode(ip));
if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) {
error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER,
@@ -733,6 +738,17 @@ xfs_qm_destroy_quotainfo(
mp->m_quotainfo = NULL;
}
+static inline enum xfs_metafile_type
+xfs_qm_metafile_type(
+ unsigned int flags)
+{
+ if (flags & XFS_QMOPT_UQUOTA)
+ return XFS_METAFILE_USRQUOTA;
+ else if (flags & XFS_QMOPT_GQUOTA)
+ return XFS_METAFILE_GRPQUOTA;
+ return XFS_METAFILE_PRJQUOTA;
+}
+
/*
* Create an inode and return with a reference already taken, but unlocked
* This is how we create quota inodes
@@ -744,6 +760,7 @@ xfs_qm_qino_alloc(
unsigned int flags)
{
struct xfs_trans *tp;
+ enum xfs_metafile_type metafile_type = xfs_qm_metafile_type(flags);
int error;
bool need_alloc = true;
@@ -777,9 +794,10 @@ xfs_qm_qino_alloc(
}
}
if (ino != NULLFSINO) {
- error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
+ error = xfs_metafile_iget(mp, ino, metafile_type, ipp);
if (error)
return error;
+
mp->m_sb.sb_gquotino = NULLFSINO;
mp->m_sb.sb_pquotino = NULLFSINO;
need_alloc = false;
@@ -806,6 +824,8 @@ xfs_qm_qino_alloc(
xfs_trans_cancel(tp);
return error;
}
+ if (xfs_has_metadir(mp))
+ xfs_metafile_set_iflag(tp, *ipp, metafile_type);
}
/*
@@ -1189,6 +1209,10 @@ xfs_qm_dqusage_adjust(
}
}
+ /* Metadata directory files are not accounted to user-visible quotas. */
+ if (xfs_is_metadir_inode(ip))
+ goto error0;
+
ASSERT(ip->i_delayed_blks == 0);
if (XFS_IS_REALTIME_INODE(ip)) {
@@ -1553,16 +1577,20 @@ xfs_qm_qino_load(
struct xfs_inode **ipp)
{
xfs_ino_t ino = NULLFSINO;
+ enum xfs_metafile_type metafile_type = XFS_METAFILE_UNKNOWN;
switch (type) {
case XFS_DQTYPE_USER:
ino = mp->m_sb.sb_uquotino;
+ metafile_type = XFS_METAFILE_USRQUOTA;
break;
case XFS_DQTYPE_GROUP:
ino = mp->m_sb.sb_gquotino;
+ metafile_type = XFS_METAFILE_GRPQUOTA;
break;
case XFS_DQTYPE_PROJ:
ino = mp->m_sb.sb_pquotino;
+ metafile_type = XFS_METAFILE_PRJQUOTA;
break;
default:
ASSERT(0);
@@ -1572,7 +1600,7 @@ xfs_qm_qino_load(
if (ino == NULLFSINO)
return -ENOENT;
- return xfs_iget(mp, NULL, ino, 0, 0, ipp);
+ return xfs_metafile_iget(mp, ino, metafile_type, ipp);
}
/*
@@ -1735,6 +1763,8 @@ xfs_qm_vop_dqalloc(
if (!XFS_IS_QUOTA_ON(mp))
return 0;
+ ASSERT(!xfs_is_metadir_inode(ip));
+
lockflags = XFS_ILOCK_EXCL;
xfs_ilock(ip, lockflags);
@@ -1864,6 +1894,7 @@ xfs_qm_vop_chown(
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
ASSERT(XFS_IS_QUOTA_ON(ip->i_mount));
+ ASSERT(!xfs_is_metadir_inode(ip));
/* old dquot */
prevdq = *IO_olddq;
@@ -1951,6 +1982,7 @@ xfs_qm_vop_create_dqattach(
return;
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
+ ASSERT(!xfs_is_metadir_inode(ip));
if (udqp && XFS_IS_UQUOTA_ON(mp)) {
ASSERT(ip->i_udquot == NULL);
diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h
index 23d71a55bbc0..645761997bf2 100644
--- a/fs/xfs/xfs_quota.h
+++ b/fs/xfs/xfs_quota.h
@@ -29,6 +29,11 @@ struct xfs_buf;
(XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \
(XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL))
+#define XFS_IS_DQDETACHED(ip) \
+ ((ip)->i_udquot == NULL && \
+ (ip)->i_gdquot == NULL && \
+ (ip)->i_pdquot == NULL)
+
#define XFS_QM_NEED_QUOTACHECK(mp) \
((XFS_IS_UQUOTA_ON(mp) && \
(mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 3a2005a1e673..46a920b192d1 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -25,6 +25,8 @@
#include "xfs_quota.h"
#include "xfs_log_priv.h"
#include "xfs_health.h"
+#include "xfs_da_format.h"
+#include "xfs_metafile.h"
/*
* Return whether there are any free extents in the size range given
@@ -1101,16 +1103,12 @@ xfs_rtalloc_reinit_frextents(
*/
static inline int
xfs_rtmount_iread_extents(
+ struct xfs_trans *tp,
struct xfs_inode *ip,
unsigned int lock_class)
{
- struct xfs_trans *tp;
int error;
- error = xfs_trans_alloc_empty(ip->i_mount, &tp);
- if (error)
- return error;
-
xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class);
error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
@@ -1125,7 +1123,6 @@ xfs_rtmount_iread_extents(
out_unlock:
xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class);
- xfs_trans_cancel(tp);
return error;
}
@@ -1133,45 +1130,54 @@ out_unlock:
* Get the bitmap and summary inodes and the summary cache into the mount
* structure at mount time.
*/
-int /* error */
+int
xfs_rtmount_inodes(
- xfs_mount_t *mp) /* file system mount structure */
+ struct xfs_mount *mp)
{
- int error; /* error return value */
- xfs_sb_t *sbp;
+ struct xfs_trans *tp;
+ struct xfs_sb *sbp = &mp->m_sb;
+ int error;
- sbp = &mp->m_sb;
- error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip);
+ error = xfs_trans_alloc_empty(mp, &tp);
+ if (error)
+ return error;
+
+ error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rbmino,
+ XFS_METAFILE_RTBITMAP, &mp->m_rbmip);
if (xfs_metadata_is_sick(error))
xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP);
if (error)
- return error;
+ goto out_trans;
ASSERT(mp->m_rbmip != NULL);
- error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP);
+ error = xfs_rtmount_iread_extents(tp, mp->m_rbmip, XFS_ILOCK_RTBITMAP);
if (error)
goto out_rele_bitmap;
- error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip);
+ error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rsumino,
+ XFS_METAFILE_RTSUMMARY, &mp->m_rsumip);
if (xfs_metadata_is_sick(error))
xfs_rt_mark_sick(mp, XFS_SICK_RT_SUMMARY);
if (error)
goto out_rele_bitmap;
ASSERT(mp->m_rsumip != NULL);
- error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM);
+ error = xfs_rtmount_iread_extents(tp, mp->m_rsumip, XFS_ILOCK_RTSUM);
if (error)
goto out_rele_summary;
error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks);
if (error)
goto out_rele_summary;
+ xfs_trans_cancel(tp);
return 0;
out_rele_summary:
xfs_irele(mp->m_rsumip);
out_rele_bitmap:
xfs_irele(mp->m_rbmip);
+out_trans:
+ xfs_trans_cancel(tp);
return error;
}
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 457c2d70968d..be493d392960 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1657,9 +1657,7 @@ xfs_fs_fill_super(
goto out_free_sb;
}
- xfs_warn(mp,
-"EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.",
- mp->m_sb.sb_blocksize);
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS);
}
/* Ensure this filesystem fits in the page cache limits */
@@ -1733,6 +1731,9 @@ xfs_fs_fill_super(
mp->m_features &= ~XFS_FEAT_DISCARD;
}
+ if (xfs_has_metadir(mp))
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR);
+
if (xfs_has_reflink(mp)) {
if (mp->m_sb.sb_rblocks) {
xfs_alert(mp,
@@ -1755,12 +1756,10 @@ xfs_fs_fill_super(
}
if (xfs_has_exchange_range(mp))
- xfs_warn(mp,
- "EXPERIMENTAL exchange-range feature enabled. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE);
if (xfs_has_parent(mp))
- xfs_warn(mp,
- "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR);
error = xfs_mountfs(mp);
if (error)
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 17164b2d0472..1b9d75a54c5e 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -46,6 +46,8 @@
#include "xfs_parent.h"
#include "xfs_rmap.h"
#include "xfs_refcount.h"
+#include "xfs_metafile.h"
+#include "xfs_metadir.h"
/*
* We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0d5caba76a82..9a6a09e2e529 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -95,6 +95,7 @@ struct xfs_attrlist_cursor_kern;
struct xfs_extent_free_item;
struct xfs_rmap_intent;
struct xfs_refcount_intent;
+struct xfs_metadir_update;
#define XFS_ATTR_FILTER_FLAGS \
{ XFS_ATTR_ROOT, "ROOT" }, \
@@ -5356,6 +5357,107 @@ DEFINE_EVENT(xfs_getparents_class, name, \
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin);
DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end);
+DECLARE_EVENT_CLASS(xfs_metadir_update_class,
+ TP_PROTO(const struct xfs_metadir_update *upd),
+ TP_ARGS(upd),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __string(fname, upd->path)
+ ),
+ TP_fast_assign(
+ __entry->dev = upd->dp->i_mount->m_super->s_dev;
+ __entry->dp_ino = upd->dp->i_ino;
+ __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO;
+ __assign_str(fname);
+ ),
+ TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(fname),
+ __entry->ino)
+)
+
+#define DEFINE_METADIR_UPDATE_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_update_class, name, \
+ TP_PROTO(const struct xfs_metadir_update *upd), \
+ TP_ARGS(upd))
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_link);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_commit);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_cancel);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_try_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_create);
+DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_link);
+
+DECLARE_EVENT_CLASS(xfs_metadir_update_error_class,
+ TP_PROTO(const struct xfs_metadir_update *upd, int error),
+ TP_ARGS(upd, error),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __field(int, error)
+ __string(fname, upd->path)
+ ),
+ TP_fast_assign(
+ __entry->dev = upd->dp->i_mount->m_super->s_dev;
+ __entry->dp_ino = upd->dp->i_ino;
+ __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO;
+ __entry->error = error;
+ __assign_str(fname);
+ ),
+ TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx error %d",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __get_str(fname),
+ __entry->ino,
+ __entry->error)
+)
+
+#define DEFINE_METADIR_UPDATE_ERROR_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_update_error_class, name, \
+ TP_PROTO(const struct xfs_metadir_update *upd, int error), \
+ TP_ARGS(upd, error))
+DEFINE_METADIR_UPDATE_ERROR_EVENT(xfs_metadir_teardown);
+
+DECLARE_EVENT_CLASS(xfs_metadir_class,
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name,
+ xfs_ino_t ino),
+ TP_ARGS(dp, name, ino),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_ino_t, dp_ino)
+ __field(xfs_ino_t, ino)
+ __field(int, ftype)
+ __field(int, namelen)
+ __dynamic_array(char, name, name->len)
+ ),
+ TP_fast_assign(
+ __entry->dev = VFS_I(dp)->i_sb->s_dev;
+ __entry->dp_ino = dp->i_ino;
+ __entry->ino = ino,
+ __entry->ftype = name->type;
+ __entry->namelen = name->len;
+ memcpy(__get_str(name), name->name, name->len);
+ ),
+ TP_printk("dev %d:%d dir 0x%llx type %s name '%.*s' ino 0x%llx",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __entry->dp_ino,
+ __print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR),
+ __entry->namelen,
+ __get_str(name),
+ __entry->ino)
+)
+
+#define DEFINE_METADIR_EVENT(name) \
+DEFINE_EVENT(xfs_metadir_class, name, \
+ TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, \
+ xfs_ino_t ino), \
+ TP_ARGS(dp, name, ino))
+DEFINE_METADIR_EVENT(xfs_metadir_lookup);
+
#endif /* _TRACE_XFS_H */
#undef TRACE_INCLUDE_PATH
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index b368e13424c4..ca7df018290e 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -156,6 +156,8 @@ xfs_trans_mod_ino_dquot(
unsigned int field,
int64_t delta)
{
+ ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip));
+
xfs_trans_mod_dquot(tp, dqp, field, delta);
if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) {
@@ -247,6 +249,8 @@ xfs_trans_mod_dquot_byino(
xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
return;
+ ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip));
+
if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta);
if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
@@ -962,6 +966,8 @@ xfs_trans_reserve_quota_nblks(
if (!XFS_IS_QUOTA_ON(mp))
return 0;
+ if (xfs_is_metadir_inode(ip))
+ return 0;
ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino));
xfs_assert_ilocked(ip, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c
index eaf849260bd6..0f641a9091ec 100644
--- a/fs/xfs/xfs_xattr.c
+++ b/fs/xfs/xfs_xattr.c
@@ -51,8 +51,7 @@ xfs_attr_grab_log_assist(
return error;
xfs_set_using_logged_xattrs(mp);
- xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP,
- "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!");
+ xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LARP);
return 0;
}