diff options
72 files changed, 2332 insertions, 205 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 94cb8ca9f9da..d80c2817eb48 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -16,6 +16,7 @@ xfs-y += xfs_trace.o xfs-y += $(addprefix libxfs/, \ xfs_group.o \ xfs_ag.o \ + xfs_ag_resv.o \ xfs_alloc.o \ xfs_alloc_btree.o \ xfs_attr.o \ @@ -43,7 +44,8 @@ xfs-y += $(addprefix libxfs/, \ xfs_inode_buf.o \ xfs_inode_util.o \ xfs_log_rlimit.o \ - xfs_ag_resv.o \ + xfs_metadir.o \ + xfs_metafile.o \ xfs_parent.o \ xfs_rmap.o \ xfs_rmap_btree.o \ @@ -172,6 +174,7 @@ xfs-y += $(addprefix scrub/, \ inode.o \ iscan.o \ listxattr.o \ + metapath.o \ nlinks.o \ parent.o \ readdir.o \ diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c index c63da14eee04..17875ad865f5 100644 --- a/fs/xfs/libxfs/xfs_attr.c +++ b/fs/xfs/libxfs/xfs_attr.c @@ -1004,7 +1004,10 @@ xfs_attr_add_fork( unsigned int blks; /* space reservation */ int error; /* error return value */ - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); blks = XFS_ADDAFORK_SPACE_RES(mp); diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 5eda036cf9bf..7805a36e98c4 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -1042,7 +1042,10 @@ xfs_bmap_add_attrfork( int error; /* error return value */ xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); - ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); + if (xfs_is_metadir_inode(ip)) + ASSERT(XFS_IS_DQDETACHED(ip)); + else + ASSERT(!XFS_NOT_DQATTACHED(mp, ip)); ASSERT(!xfs_inode_has_attr_fork(ip)); xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index e1bfee0c3b1a..616f81045921 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -174,6 +174,8 @@ typedef struct xfs_sb { xfs_lsn_t sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + xfs_ino_t sb_metadirino; /* metadata directory tree root */ + /* must be padded to 64 bit alignment */ } xfs_sb_t; @@ -259,6 +261,8 @@ struct xfs_dsb { __be64 sb_lsn; /* last write sequence */ uuid_t sb_meta_uuid; /* metadata file system unique id */ + __be64 sb_metadirino; /* metadata directory tree root */ + /* must be padded to 64 bit alignment */ }; @@ -278,7 +282,7 @@ struct xfs_dsb { #define XFS_SB_VERSION_NUM(sbp) ((sbp)->sb_versionnum & XFS_SB_VERSION_NUMBITS) -static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) +static inline bool xfs_sb_is_v5(const struct xfs_sb *sbp) { return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5; } @@ -287,12 +291,12 @@ static inline bool xfs_sb_is_v5(struct xfs_sb *sbp) * Detect a mismatched features2 field. Older kernels read/wrote * this into the wrong slot, so to be safe we keep them in sync. */ -static inline bool xfs_sb_has_mismatched_features2(struct xfs_sb *sbp) +static inline bool xfs_sb_has_mismatched_features2(const struct xfs_sb *sbp) { return sbp->sb_bad_features2 != sbp->sb_features2; } -static inline bool xfs_sb_version_hasmorebits(struct xfs_sb *sbp) +static inline bool xfs_sb_version_hasmorebits(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) || (sbp->sb_versionnum & XFS_SB_VERSION_MOREBITSBIT); @@ -342,8 +346,8 @@ static inline void xfs_sb_version_addprojid32(struct xfs_sb *sbp) #define XFS_SB_FEAT_COMPAT_UNKNOWN ~XFS_SB_FEAT_COMPAT_ALL static inline bool xfs_sb_has_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_compat & feature) != 0; } @@ -360,8 +364,8 @@ xfs_sb_has_compat_feature( #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL static inline bool xfs_sb_has_ro_compat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_ro_compat & feature) != 0; } @@ -374,6 +378,7 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_NREXT64 (1 << 5) /* large extent counters */ #define XFS_SB_FEAT_INCOMPAT_EXCHRANGE (1 << 6) /* exchangerange supported */ #define XFS_SB_FEAT_INCOMPAT_PARENT (1 << 7) /* parent pointers */ +#define XFS_SB_FEAT_INCOMPAT_METADIR (1 << 8) /* metadata dir tree */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE | \ XFS_SB_FEAT_INCOMPAT_SPINODES | \ @@ -387,8 +392,8 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool xfs_sb_has_incompat_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_incompat & feature) != 0; } @@ -399,8 +404,8 @@ xfs_sb_has_incompat_feature( #define XFS_SB_FEAT_INCOMPAT_LOG_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_LOG_ALL static inline bool xfs_sb_has_incompat_log_feature( - struct xfs_sb *sbp, - uint32_t feature) + const struct xfs_sb *sbp, + uint32_t feature) { return (sbp->sb_features_log_incompat & feature) != 0; } @@ -420,7 +425,7 @@ xfs_sb_add_incompat_log_features( sbp->sb_features_log_incompat |= features; } -static inline bool xfs_sb_version_haslogxattrs(struct xfs_sb *sbp) +static inline bool xfs_sb_version_haslogxattrs(const struct xfs_sb *sbp) { return xfs_sb_is_v5(sbp) && (sbp->sb_features_log_incompat & XFS_SB_FEAT_INCOMPAT_LOG_XATTRS); @@ -790,6 +795,27 @@ static inline time64_t xfs_bigtime_to_unix(uint64_t ondisk_seconds) return (time64_t)ondisk_seconds - XFS_BIGTIME_EPOCH_OFFSET; } +enum xfs_metafile_type { + XFS_METAFILE_UNKNOWN, /* unknown */ + XFS_METAFILE_DIR, /* metadir directory */ + XFS_METAFILE_USRQUOTA, /* user quota */ + XFS_METAFILE_GRPQUOTA, /* group quota */ + XFS_METAFILE_PRJQUOTA, /* project quota */ + XFS_METAFILE_RTBITMAP, /* rt bitmap */ + XFS_METAFILE_RTSUMMARY, /* rt summary */ + + XFS_METAFILE_MAX +} __packed; + +#define XFS_METAFILE_TYPE_STR \ + { XFS_METAFILE_UNKNOWN, "unknown" }, \ + { XFS_METAFILE_DIR, "dir" }, \ + { XFS_METAFILE_USRQUOTA, "usrquota" }, \ + { XFS_METAFILE_GRPQUOTA, "grpquota" }, \ + { XFS_METAFILE_PRJQUOTA, "prjquota" }, \ + { XFS_METAFILE_RTBITMAP, "rtbitmap" }, \ + { XFS_METAFILE_RTSUMMARY, "rtsummary" } + /* * On-disk inode structure. * @@ -812,7 +838,7 @@ struct xfs_dinode { __be16 di_mode; /* mode and type of file */ __u8 di_version; /* inode version */ __u8 di_format; /* format of di_c data */ - __be16 di_onlink; /* old number of links to file */ + __be16 di_metatype; /* XFS_METAFILE_*; was di_onlink */ __be32 di_uid; /* owner's user id */ __be32 di_gid; /* owner's group id */ __be32 di_nlink; /* number of links to file */ @@ -1088,21 +1114,60 @@ static inline void xfs_dinode_put_rdev(struct xfs_dinode *dip, xfs_dev_t rdev) * Values for di_flags2 These start by being exposed to userspace in the upper * 16 bits of the XFS_XFLAG_s range. */ -#define XFS_DIFLAG2_DAX_BIT 0 /* use DAX for this inode */ -#define XFS_DIFLAG2_REFLINK_BIT 1 /* file's blocks may be shared */ -#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 /* copy on write extent size hint */ -#define XFS_DIFLAG2_BIGTIME_BIT 3 /* big timestamps */ -#define XFS_DIFLAG2_NREXT64_BIT 4 /* large extent counters */ +/* use DAX for this inode */ +#define XFS_DIFLAG2_DAX_BIT 0 + +/* file's blocks may be shared */ +#define XFS_DIFLAG2_REFLINK_BIT 1 -#define XFS_DIFLAG2_DAX (1 << XFS_DIFLAG2_DAX_BIT) -#define XFS_DIFLAG2_REFLINK (1 << XFS_DIFLAG2_REFLINK_BIT) -#define XFS_DIFLAG2_COWEXTSIZE (1 << XFS_DIFLAG2_COWEXTSIZE_BIT) -#define XFS_DIFLAG2_BIGTIME (1 << XFS_DIFLAG2_BIGTIME_BIT) -#define XFS_DIFLAG2_NREXT64 (1 << XFS_DIFLAG2_NREXT64_BIT) +/* copy on write extent size hint */ +#define XFS_DIFLAG2_COWEXTSIZE_BIT 2 + +/* big timestamps */ +#define XFS_DIFLAG2_BIGTIME_BIT 3 + +/* large extent counters */ +#define XFS_DIFLAG2_NREXT64_BIT 4 + +/* + * The inode contains filesystem metadata and can be found through the metadata + * directory tree. Metadata inodes must satisfy the following constraints: + * + * - V5 filesystem (and ftype) are enabled; + * - The only valid modes are regular files and directories; + * - The access bits must be zero; + * - DMAPI event and state masks are zero; + * - The user and group IDs must be zero; + * - The project ID can be used as a u32 annotation; + * - The immutable, sync, noatime, nodump, nodefrag flags must be set. + * - The dax flag must not be set. + * - Directories must have nosymlinks set. + * + * These requirements are chosen defensively to minimize the ability of + * userspace to read or modify the contents, should a metadata file ever + * escape to userspace. + * + * There are further constraints on the directory tree itself: + * + * - Metadata inodes must never be resolvable through the root directory; + * - They must never be accessed by userspace; + * - Metadata directory entries must have correct ftype. + * + * Superblock-rooted metadata files must have the METADATA iflag set even + * though they do not have a parent directory. + */ +#define XFS_DIFLAG2_METADATA_BIT 5 + +#define XFS_DIFLAG2_DAX (1ULL << XFS_DIFLAG2_DAX_BIT) +#define XFS_DIFLAG2_REFLINK (1ULL << XFS_DIFLAG2_REFLINK_BIT) +#define XFS_DIFLAG2_COWEXTSIZE (1ULL << XFS_DIFLAG2_COWEXTSIZE_BIT) +#define XFS_DIFLAG2_BIGTIME (1ULL << XFS_DIFLAG2_BIGTIME_BIT) +#define XFS_DIFLAG2_NREXT64 (1ULL << XFS_DIFLAG2_NREXT64_BIT) +#define XFS_DIFLAG2_METADATA (1ULL << XFS_DIFLAG2_METADATA_BIT) #define XFS_DIFLAG2_ANY \ (XFS_DIFLAG2_DAX | XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE | \ - XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64) + XFS_DIFLAG2_BIGTIME | XFS_DIFLAG2_NREXT64 | XFS_DIFLAG2_METADATA) static inline bool xfs_dinode_has_bigtime(const struct xfs_dinode *dip) { @@ -1117,6 +1182,12 @@ static inline bool xfs_dinode_has_large_extent_counts( (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_NREXT64)); } +static inline bool xfs_dinode_is_metadir(const struct xfs_dinode *dip) +{ + return dip->di_version >= 3 && + (dip->di_flags2 & cpu_to_be64(XFS_DIFLAG2_METADATA)); +} + /* * Inode number format: * low inopblog bits - offset in block diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h index 860284064c5a..faa38a7d1eb0 100644 --- a/fs/xfs/libxfs/xfs_fs.h +++ b/fs/xfs/libxfs/xfs_fs.h @@ -198,6 +198,8 @@ struct xfs_fsop_geom { #define XFS_FSOP_GEOM_SICK_RT_SUMMARY (1 << 5) /* realtime summary */ #define XFS_FSOP_GEOM_SICK_QUOTACHECK (1 << 6) /* quota counts */ #define XFS_FSOP_GEOM_SICK_NLINKS (1 << 7) /* inode link counts */ +#define XFS_FSOP_GEOM_SICK_METADIR (1 << 8) /* metadata directory */ +#define XFS_FSOP_GEOM_SICK_METAPATH (1 << 9) /* metadir tree path */ /* Output for XFS_FS_COUNTS */ typedef struct xfs_fsop_counts { @@ -242,6 +244,7 @@ typedef struct xfs_fsop_resblks { #define XFS_FSOP_GEOM_FLAGS_NREXT64 (1 << 23) /* large extent counters */ #define XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE (1 << 24) /* exchange range */ #define XFS_FSOP_GEOM_FLAGS_PARENT (1 << 25) /* linux parent pointers */ +#define XFS_FSOP_GEOM_FLAGS_METADIR (1 << 26) /* metadata directories */ /* * Minimum and maximum sizes need for growth checks. @@ -489,9 +492,17 @@ struct xfs_bulk_ireq { */ #define XFS_BULK_IREQ_NREXT64 (1U << 2) +/* + * Allow bulkstat to return information about metadata directories. This + * enables xfs_scrub to find them for scanning, as they are otherwise ordinary + * directories. + */ +#define XFS_BULK_IREQ_METADIR (1U << 3) + #define XFS_BULK_IREQ_FLAGS_ALL (XFS_BULK_IREQ_AGNO | \ XFS_BULK_IREQ_SPECIAL | \ - XFS_BULK_IREQ_NREXT64) + XFS_BULK_IREQ_NREXT64 | \ + XFS_BULK_IREQ_METADIR) /* Operate on the root directory inode. */ #define XFS_BULK_IREQ_SPECIAL_ROOT (1) @@ -722,9 +733,10 @@ struct xfs_scrub_metadata { #define XFS_SCRUB_TYPE_NLINKS 26 /* inode link counts */ #define XFS_SCRUB_TYPE_HEALTHY 27 /* everything checked out ok */ #define XFS_SCRUB_TYPE_DIRTREE 28 /* directory tree structure */ +#define XFS_SCRUB_TYPE_METAPATH 29 /* metadata directory tree paths */ /* Number of scrub subcommands. */ -#define XFS_SCRUB_TYPE_NR 29 +#define XFS_SCRUB_TYPE_NR 30 /* * This special type code only applies to the vectored scrub implementation. @@ -803,6 +815,15 @@ struct xfs_scrub_vec_head { #define XFS_SCRUB_VEC_FLAGS_ALL (0) /* + * i: sm_ino values for XFS_SCRUB_TYPE_METAPATH to select a metadata file for + * path checking. + */ +#define XFS_SCRUB_METAPATH_PROBE (0) /* do we have a metapath scrubber? */ + +/* Number of metapath sm_ino values */ +#define XFS_SCRUB_METAPATH_NR (1) + +/* * ioctl limits */ #ifdef XATTR_LIST_MAX diff --git a/fs/xfs/libxfs/xfs_health.h b/fs/xfs/libxfs/xfs_health.h index 13301420a2f6..a23df94319e5 100644 --- a/fs/xfs/libxfs/xfs_health.h +++ b/fs/xfs/libxfs/xfs_health.h @@ -62,6 +62,8 @@ struct xfs_da_args; #define XFS_SICK_FS_PQUOTA (1 << 3) /* project quota */ #define XFS_SICK_FS_QUOTACHECK (1 << 4) /* quota counts */ #define XFS_SICK_FS_NLINKS (1 << 5) /* inode link counts */ +#define XFS_SICK_FS_METADIR (1 << 6) /* metadata directory tree */ +#define XFS_SICK_FS_METAPATH (1 << 7) /* metadata directory tree path */ /* Observable health issues for realtime volume metadata. */ #define XFS_SICK_RT_BITMAP (1 << 0) /* realtime bitmap */ @@ -105,7 +107,9 @@ struct xfs_da_args; XFS_SICK_FS_GQUOTA | \ XFS_SICK_FS_PQUOTA | \ XFS_SICK_FS_QUOTACHECK | \ - XFS_SICK_FS_NLINKS) + XFS_SICK_FS_NLINKS | \ + XFS_SICK_FS_METADIR | \ + XFS_SICK_FS_METAPATH) #define XFS_SICK_RT_PRIMARY (XFS_SICK_RT_BITMAP | \ XFS_SICK_RT_SUMMARY) diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f0261c4d9106..8b84e2cf711b 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -1842,6 +1842,40 @@ out_release: } /* + * Pick an AG for the new inode. + * + * Directories, symlinks, and regular files frequently allocate at least one + * block, so factor that potential expansion when we examine whether an AG has + * enough space for file creation. Try to keep metadata files all in the same + * AG. + */ +static inline xfs_agnumber_t +xfs_dialloc_pick_ag( + struct xfs_mount *mp, + struct xfs_inode *dp, + umode_t mode) +{ + xfs_agnumber_t start_agno; + + if (!dp) + return 0; + if (xfs_is_metadir_inode(dp)) { + if (mp->m_sb.sb_logstart) + return XFS_FSB_TO_AGNO(mp, mp->m_sb.sb_logstart); + return 0; + } + + if (S_ISDIR(mode)) + return (atomic_inc_return(&mp->m_agirotor) - 1) % mp->m_maxagi; + + start_agno = XFS_INO_TO_AGNO(mp, dp->i_ino); + if (start_agno >= mp->m_maxagi) + start_agno = 0; + + return start_agno; +} + +/* * Allocate an on-disk inode. * * Mode is used to tell whether the new inode is a directory and hence where to @@ -1856,31 +1890,19 @@ xfs_dialloc( xfs_ino_t *new_ino) { struct xfs_mount *mp = (*tpp)->t_mountp; + struct xfs_perag *pag; + struct xfs_ino_geometry *igeo = M_IGEO(mp); + xfs_ino_t ino = NULLFSINO; xfs_ino_t parent = args->pip ? args->pip->i_ino : 0; - umode_t mode = args->mode & S_IFMT; xfs_agnumber_t agno; - int error = 0; xfs_agnumber_t start_agno; - struct xfs_perag *pag; - struct xfs_ino_geometry *igeo = M_IGEO(mp); + umode_t mode = args->mode & S_IFMT; bool ok_alloc = true; bool low_space = false; int flags; - xfs_ino_t ino = NULLFSINO; + int error = 0; - /* - * Directories, symlinks, and regular files frequently allocate at least - * one block, so factor that potential expansion when we examine whether - * an AG has enough space for file creation. - */ - if (S_ISDIR(mode)) - start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) % - mp->m_maxagi; - else { - start_agno = XFS_INO_TO_AGNO(mp, parent); - if (start_agno >= mp->m_maxagi) - start_agno = 0; - } + start_agno = xfs_dialloc_pick_ag(mp, args->pip, mode); /* * If we have already hit the ceiling of inode blocks then clear diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index 79babeac9d75..424861fbf1bd 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -19,6 +19,7 @@ #include "xfs_ialloc.h" #include "xfs_dir2.h" #include "xfs_health.h" +#include "xfs_metafile.h" #include <linux/iversion.h> @@ -209,12 +210,15 @@ xfs_inode_from_disk( * They will also be unconditionally written back to disk as v2 inodes. */ if (unlikely(from->di_version == 1)) { - set_nlink(inode, be16_to_cpu(from->di_onlink)); + /* di_metatype used to be di_onlink */ + set_nlink(inode, be16_to_cpu(from->di_metatype)); ip->i_projid = 0; } else { set_nlink(inode, be32_to_cpu(from->di_nlink)); ip->i_projid = (prid_t)be16_to_cpu(from->di_projid_hi) << 16 | be16_to_cpu(from->di_projid_lo); + if (xfs_dinode_is_metadir(from)) + ip->i_metatype = be16_to_cpu(from->di_metatype); } i_uid_write(inode, be32_to_cpu(from->di_uid)); @@ -315,7 +319,10 @@ xfs_inode_to_disk( struct inode *inode = VFS_I(ip); to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); - to->di_onlink = 0; + if (xfs_is_metadir_inode(ip)) + to->di_metatype = cpu_to_be16(ip->i_metatype); + else + to->di_metatype = 0; to->di_format = xfs_ifork_format(&ip->i_df); to->di_uid = cpu_to_be32(i_uid_read(inode)); @@ -483,6 +490,69 @@ xfs_dinode_verify_nrext64( return NULL; } +/* + * Validate all the picky requirements we have for a file that claims to be + * filesystem metadata. + */ +xfs_failaddr_t +xfs_dinode_verify_metadir( + struct xfs_mount *mp, + struct xfs_dinode *dip, + uint16_t mode, + uint16_t flags, + uint64_t flags2) +{ + if (!xfs_has_metadir(mp)) + return __this_address; + + /* V5 filesystem only */ + if (dip->di_version < 3) + return __this_address; + + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + return __this_address; + + /* V3 inode fields that are always zero */ + if ((flags2 & XFS_DIFLAG2_NREXT64) && dip->di_nrext64_pad) + return __this_address; + if (!(flags2 & XFS_DIFLAG2_NREXT64) && dip->di_flushiter) + return __this_address; + + /* Metadata files can only be directories or regular files */ + if (!S_ISDIR(mode) && !S_ISREG(mode)) + return __this_address; + + /* They must have zero access permissions */ + if (mode & 0777) + return __this_address; + + /* DMAPI event and state masks are zero */ + if (dip->di_dmevmask || dip->di_dmstate) + return __this_address; + + /* + * User and group IDs must be zero. The project ID is used for + * grouping inodes. Metadata inodes are never accounted to quotas. + */ + if (dip->di_uid || dip->di_gid) + return __this_address; + + /* Mandatory inode flags must be set */ + if (S_ISDIR(mode)) { + if ((flags & XFS_METADIR_DIFLAGS) != XFS_METADIR_DIFLAGS) + return __this_address; + } else { + if ((flags & XFS_METAFILE_DIFLAGS) != XFS_METAFILE_DIFLAGS) + return __this_address; + } + + /* dax flags2 must not be set */ + if (flags2 & XFS_DIFLAG2_DAX) + return __this_address; + + return NULL; +} + xfs_failaddr_t xfs_dinode_verify( struct xfs_mount *mp, @@ -523,8 +593,11 @@ xfs_dinode_verify( * di_nlink==0 on a V1 inode. V2/3 inodes would get written out with * di_onlink==0, so we can check that. */ - if (dip->di_version >= 2) { - if (dip->di_onlink) + if (dip->di_version == 2) { + if (dip->di_metatype) + return __this_address; + } else if (dip->di_version >= 3) { + if (!xfs_dinode_is_metadir(dip) && dip->di_metatype) return __this_address; } @@ -546,7 +619,8 @@ xfs_dinode_verify( if (dip->di_nlink) return __this_address; } else { - if (dip->di_onlink) + /* di_metatype used to be di_onlink */ + if (dip->di_metatype) return __this_address; } } @@ -663,6 +737,12 @@ xfs_dinode_verify( !xfs_has_bigtime(mp)) return __this_address; + if (flags2 & XFS_DIFLAG2_METADATA) { + fa = xfs_dinode_verify_metadir(mp, dip, mode, flags, flags2); + if (fa) + return fa; + } + return NULL; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h index 585ed5a110af..8d43d2641c73 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.h +++ b/fs/xfs/libxfs/xfs_inode_buf.h @@ -28,6 +28,9 @@ int xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from); xfs_failaddr_t xfs_dinode_verify(struct xfs_mount *mp, xfs_ino_t ino, struct xfs_dinode *dip); +xfs_failaddr_t xfs_dinode_verify_metadir(struct xfs_mount *mp, + struct xfs_dinode *dip, uint16_t mode, uint16_t flags, + uint64_t flags2); xfs_failaddr_t xfs_inode_validate_extsize(struct xfs_mount *mp, uint32_t extsize, uint16_t mode, uint16_t flags); xfs_failaddr_t xfs_inode_validate_cowextsize(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_inode_util.c b/fs/xfs/libxfs/xfs_inode_util.c index ec64eda3bbe2..deb0b7c00a1f 100644 --- a/fs/xfs/libxfs/xfs_inode_util.c +++ b/fs/xfs/libxfs/xfs_inode_util.c @@ -224,6 +224,8 @@ xfs_inode_inherit_flags2( } if (pip->i_diflags2 & XFS_DIFLAG2_DAX) ip->i_diflags2 |= XFS_DIFLAG2_DAX; + if (xfs_is_metadir_inode(pip)) + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; /* Don't let invalid cowextsize hints propagate. */ failaddr = xfs_inode_validate_cowextsize(ip->i_mount, ip->i_cowextsize, diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 3e6682ed656b..ace7384a275b 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -404,7 +404,7 @@ struct xfs_log_dinode { uint16_t di_mode; /* mode and type of file */ int8_t di_version; /* inode version */ int8_t di_format; /* format of di_c data */ - uint8_t di_pad3[2]; /* unused in v2/3 inodes */ + uint16_t di_metatype; /* metadata type, if DIFLAG2_METADATA */ uint32_t di_uid; /* owner's user id */ uint32_t di_gid; /* owner's group id */ uint32_t di_nlink; /* number of links to file */ diff --git a/fs/xfs/libxfs/xfs_metadir.c b/fs/xfs/libxfs/xfs_metadir.c new file mode 100644 index 000000000000..bae7377c0f22 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.c @@ -0,0 +1,481 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" +#include "xfs_trace.h" +#include "xfs_inode.h" +#include "xfs_quota.h" +#include "xfs_ialloc.h" +#include "xfs_bmap_btree.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_trans_space.h" +#include "xfs_ag.h" +#include "xfs_dir2.h" +#include "xfs_dir2_priv.h" +#include "xfs_parent.h" +#include "xfs_health.h" + +/* + * Metadata Directory Tree + * ======================= + * + * These functions provide an abstraction layer for looking up, creating, and + * deleting metadata inodes that live within a special metadata directory tree. + * + * This code does not manage the five existing metadata inodes: real time + * bitmap & summary; and the user, group, and quotas. All other metadata + * inodes must use only the xfs_meta{dir,file}_* functions. + * + * Callers wishing to create or hardlink a metadata inode must create an + * xfs_metadir_update structure, call the appropriate xfs_metadir* function, + * and then call xfs_metadir_commit or xfs_metadir_cancel to commit or cancel + * the update. Files in the metadata directory tree currently cannot be + * unlinked. + * + * When the metadir feature is enabled, all metadata inodes must have the + * "metadata" inode flag set to prevent them from being exposed to the outside + * world. + * + * Callers must take the ILOCK of any inode in the metadata directory tree to + * synchronize access to that inode. It is never necessary to take the IOLOCK + * or the MMAPLOCK since metadata inodes must not be exposed to user space. + */ + +static inline void +xfs_metadir_set_xname( + struct xfs_name *xname, + const char *path, + unsigned char ftype) +{ + xname->name = (const unsigned char *)path; + xname->len = strlen(path); + xname->type = ftype; +} + +/* + * Given a parent directory @dp and a metadata inode path component @xname, + * Look up the inode number in the directory, returning it in @ino. + * @xname.type must match the directory entry's ftype. + * + * Caller must hold ILOCK_EXCL. + */ +static inline int +xfs_metadir_lookup( + struct xfs_trans *tp, + struct xfs_inode *dp, + struct xfs_name *xname, + xfs_ino_t *ino) +{ + struct xfs_mount *mp = dp->i_mount; + struct xfs_da_args args = { + .trans = tp, + .dp = dp, + .geo = mp->m_dir_geo, + .name = xname->name, + .namelen = xname->len, + .hashval = xfs_dir2_hashname(mp, xname), + .whichfork = XFS_DATA_FORK, + .op_flags = XFS_DA_OP_OKNOENT, + .owner = dp->i_ino, + }; + int error; + + if (!S_ISDIR(VFS_I(dp)->i_mode)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + if (xfs_is_shutdown(mp)) + return -EIO; + + error = xfs_dir_lookup_args(&args); + if (error) + return error; + + if (!xfs_verify_ino(mp, args.inumber)) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + if (xname->type != XFS_DIR3_FT_UNKNOWN && xname->type != args.filetype) { + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; + } + + trace_xfs_metadir_lookup(dp, xname, args.inumber); + *ino = args.inumber; + return 0; +} + +/* + * Look up and read a metadata inode from the metadata directory. If the path + * component doesn't exist, return -ENOENT. + */ +int +xfs_metadir_load( + struct xfs_trans *tp, + struct xfs_inode *dp, + const char *path, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_name xname; + xfs_ino_t ino; + int error; + + xfs_metadir_set_xname(&xname, path, XFS_DIR3_FT_UNKNOWN); + + xfs_ilock(dp, XFS_ILOCK_EXCL); + error = xfs_metadir_lookup(tp, dp, &xname, &ino); + xfs_iunlock(dp, XFS_ILOCK_EXCL); + if (error) + return error; + return xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); +} + +/* + * Unlock and release resources after committing (or cancelling) a metadata + * directory tree operation. The caller retains its reference to @upd->ip + * and must release it explicitly. + */ +static inline void +xfs_metadir_teardown( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_teardown(upd, error); + + if (upd->ppargs) { + xfs_parent_finish(upd->dp->i_mount, upd->ppargs); + upd->ppargs = NULL; + } + + if (upd->ip) { + if (upd->ip_locked) + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + upd->ip_locked = false; + } + + if (upd->dp_locked) + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + upd->dp_locked = false; +} + +/* + * Begin the process of creating a metadata file by allocating transactions + * and taking whatever resources we're going to need. + */ +int +xfs_metadir_start_create( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip == NULL); + ASSERT(xfs_has_metadir(mp)); + ASSERT(upd->metafile_type != XFS_METAFILE_UNKNOWN); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + /* + * If we ever need the ability to create rt metadata files on a + * pre-metadir filesystem, we'll need to dqattach the parent here. + * Currently we assume that mkfs will create the files and quotacheck + * will account for them. + */ + + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_create, + xfs_create_space_res(mp, MAXNAMELEN), 0, 0, &upd->tp); + if (error) + goto out_teardown; + + /* + * Lock the parent directory if there is one. We can't ijoin it to + * the transaction until after the child file has been created. + */ + xfs_ilock(upd->dp, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); + upd->dp_locked = true; + + trace_xfs_metadir_start_create(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Create a metadata inode with the given @mode, and insert it into the + * metadata directory tree at the given @upd->path. The path up to the final + * component must already exist. The final path component must not exist. + * + * The new metadata inode will be attached to the update structure @upd->ip, + * with the ILOCK held until the caller releases it. + * + * NOTE: This function may return a new inode to the caller even if it returns + * a negative error code. If an inode is passed back, the caller must finish + * setting up the inode before releasing it. + */ +int +xfs_metadir_create( + struct xfs_metadir_update *upd, + umode_t mode) +{ + struct xfs_icreate_args args = { + .pip = upd->dp, + .mode = mode, + }; + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + + /* Check that the name does not already exist in the directory. */ + xfs_metadir_set_xname(&xname, upd->path, XFS_DIR3_FT_UNKNOWN); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + /* + * A newly created regular or special file just has one directory + * entry pointing to them, but a directory also the "." entry + * pointing to itself. + */ + error = xfs_dialloc(&upd->tp, &args, &ino); + if (error) + return error; + error = xfs_icreate(upd->tp, ino, &args, &upd->ip); + if (error) + return error; + du.ip = upd->ip; + xfs_metafile_set_iflag(upd->tp, upd->ip, upd->metafile_type); + upd->ip_locked = true; + + /* + * Join the directory inode to the transaction. We do not do it + * earlier because xfs_dialloc rolls the transaction. + */ + xfs_trans_ijoin(upd->tp, upd->dp, 0); + + /* Create the entry. */ + if (S_ISDIR(args.mode)) + resblks = xfs_mkdir_space_res(mp, xname.len); + else + resblks = xfs_create_space_res(mp, xname.len); + xname.type = xfs_mode_to_ftype(args.mode); + + trace_xfs_metadir_try_create(upd); + + error = xfs_dir_create_child(upd->tp, resblks, &du); + if (error) + return error; + + /* Metadir files are not accounted to quota. */ + + trace_xfs_metadir_create(upd); + + return 0; +} + +#ifndef __KERNEL__ +/* + * Begin the process of linking a metadata file by allocating transactions + * and locking whatever resources we're going to need. + */ +int +xfs_metadir_start_link( + struct xfs_metadir_update *upd) +{ + struct xfs_mount *mp = upd->dp->i_mount; + unsigned int resblks; + int nospace_error = 0; + int error; + + ASSERT(upd->dp != NULL); + ASSERT(upd->ip != NULL); + ASSERT(xfs_has_metadir(mp)); + + error = xfs_parent_start(mp, &upd->ppargs); + if (error) + return error; + + resblks = xfs_link_space_res(mp, MAXNAMELEN); + error = xfs_trans_alloc_dir(upd->dp, &M_RES(mp)->tr_link, upd->ip, + &resblks, &upd->tp, &nospace_error); + if (error) + goto out_teardown; + if (!resblks) { + /* We don't allow reservationless updates. */ + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + xfs_iunlock(upd->dp, XFS_ILOCK_EXCL); + xfs_iunlock(upd->ip, XFS_ILOCK_EXCL); + error = nospace_error; + goto out_teardown; + } + + upd->dp_locked = true; + upd->ip_locked = true; + + trace_xfs_metadir_start_link(upd); + return 0; +out_teardown: + xfs_metadir_teardown(upd, error); + return error; +} + +/* + * Link the metadata directory given by @path to the inode @upd->ip. + * The path (up to the final component) must already exist, but the final + * component must not already exist. + */ +int +xfs_metadir_link( + struct xfs_metadir_update *upd) +{ + struct xfs_name xname; + struct xfs_dir_update du = { + .dp = upd->dp, + .name = &xname, + .ip = upd->ip, + .ppargs = upd->ppargs, + }; + struct xfs_mount *mp = upd->dp->i_mount; + xfs_ino_t ino; + unsigned int resblks; + int error; + + xfs_assert_ilocked(upd->dp, XFS_ILOCK_EXCL); + xfs_assert_ilocked(upd->ip, XFS_ILOCK_EXCL); + + /* Look up the name in the current directory. */ + xfs_metadir_set_xname(&xname, upd->path, + xfs_mode_to_ftype(VFS_I(upd->ip)->i_mode)); + error = xfs_metadir_lookup(upd->tp, upd->dp, &xname, &ino); + switch (error) { + case -ENOENT: + break; + case 0: + error = -EEXIST; + fallthrough; + default: + return error; + } + + resblks = xfs_link_space_res(mp, xname.len); + error = xfs_dir_add_child(upd->tp, resblks, &du); + if (error) + return error; + + trace_xfs_metadir_link(upd); + + return 0; +} +#endif /* ! __KERNEL__ */ + +/* Commit a metadir update and unlock/drop all resources. */ +int +xfs_metadir_commit( + struct xfs_metadir_update *upd) +{ + int error; + + trace_xfs_metadir_commit(upd); + + error = xfs_trans_commit(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); + return error; +} + +/* Cancel a metadir update and unlock/drop all resources. */ +void +xfs_metadir_cancel( + struct xfs_metadir_update *upd, + int error) +{ + trace_xfs_metadir_cancel(upd); + + xfs_trans_cancel(upd->tp); + upd->tp = NULL; + + xfs_metadir_teardown(upd, error); +} + +/* Create a metadata for the last component of the path. */ +int +xfs_metadir_mkdir( + struct xfs_inode *dp, + const char *path, + struct xfs_inode **ipp) +{ + struct xfs_metadir_update upd = { + .dp = dp, + .path = path, + .metafile_type = XFS_METAFILE_DIR, + }; + int error; + + if (xfs_is_shutdown(dp->i_mount)) + return -EIO; + + /* Allocate a transaction to create the last directory. */ + error = xfs_metadir_start_create(&upd); + if (error) + return error; + + /* Create the subdirectory and take our reference. */ + error = xfs_metadir_create(&upd, S_IFDIR); + if (error) + goto out_cancel; + + error = xfs_metadir_commit(&upd); + if (error) + goto out_irele; + + xfs_finish_inode_setup(upd.ip); + *ipp = upd.ip; + return 0; + +out_cancel: + xfs_metadir_cancel(&upd, error); +out_irele: + /* Have to finish setting up the inode to ensure it's deleted. */ + if (upd.ip) { + xfs_finish_inode_setup(upd.ip); + xfs_irele(upd.ip); + } + return error; +} diff --git a/fs/xfs/libxfs/xfs_metadir.h b/fs/xfs/libxfs/xfs_metadir.h new file mode 100644 index 000000000000..bfecac7d3d14 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metadir.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_METADIR_H__ +#define __XFS_METADIR_H__ + +/* Cleanup widget for metadata inode creation and deletion. */ +struct xfs_metadir_update { + /* Parent directory */ + struct xfs_inode *dp; + + /* Path to metadata file */ + const char *path; + + /* Parent pointer update context */ + struct xfs_parent_args *ppargs; + + /* Child metadata file */ + struct xfs_inode *ip; + + struct xfs_trans *tp; + + enum xfs_metafile_type metafile_type; + + unsigned int dp_locked:1; + unsigned int ip_locked:1; +}; + +int xfs_metadir_load(struct xfs_trans *tp, struct xfs_inode *dp, + const char *path, enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp); + +int xfs_metadir_start_create(struct xfs_metadir_update *upd); +int xfs_metadir_create(struct xfs_metadir_update *upd, umode_t mode); + +int xfs_metadir_start_link(struct xfs_metadir_update *upd); +int xfs_metadir_link(struct xfs_metadir_update *upd); + +int xfs_metadir_commit(struct xfs_metadir_update *upd); +void xfs_metadir_cancel(struct xfs_metadir_update *upd, int error); + +int xfs_metadir_mkdir(struct xfs_inode *dp, const char *path, + struct xfs_inode **ipp); + +#endif /* __XFS_METADIR_H__ */ diff --git a/fs/xfs/libxfs/xfs_metafile.c b/fs/xfs/libxfs/xfs_metafile.c new file mode 100644 index 000000000000..adeb25d1a444 --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_trans.h" +#include "xfs_metafile.h" +#include "xfs_trace.h" +#include "xfs_inode.h" + +/* Set up an inode to be recognized as a metadata directory inode. */ +void +xfs_metafile_set_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip, + enum xfs_metafile_type metafile_type) +{ + VFS_I(ip)->i_mode &= ~0777; + VFS_I(ip)->i_uid = GLOBAL_ROOT_UID; + VFS_I(ip)->i_gid = GLOBAL_ROOT_GID; + if (S_ISDIR(VFS_I(ip)->i_mode)) + ip->i_diflags |= XFS_METADIR_DIFLAGS; + else + ip->i_diflags |= XFS_METAFILE_DIFLAGS; + ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; + ip->i_diflags2 |= XFS_DIFLAG2_METADATA; + ip->i_metatype = metafile_type; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} + +/* Clear the metadata directory inode flag. */ +void +xfs_metafile_clear_iflag( + struct xfs_trans *tp, + struct xfs_inode *ip) +{ + ASSERT(xfs_is_metadir_inode(ip)); + ASSERT(VFS_I(ip)->i_nlink == 0); + + ip->i_diflags2 &= ~XFS_DIFLAG2_METADATA; + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); +} diff --git a/fs/xfs/libxfs/xfs_metafile.h b/fs/xfs/libxfs/xfs_metafile.h new file mode 100644 index 000000000000..acec400123db --- /dev/null +++ b/fs/xfs/libxfs/xfs_metafile.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * Copyright (c) 2018-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#ifndef __XFS_METAFILE_H__ +#define __XFS_METAFILE_H__ + +/* All metadata files must have these flags set. */ +#define XFS_METAFILE_DIFLAGS (XFS_DIFLAG_IMMUTABLE | \ + XFS_DIFLAG_SYNC | \ + XFS_DIFLAG_NOATIME | \ + XFS_DIFLAG_NODUMP | \ + XFS_DIFLAG_NODEFRAG) + +/* All metadata directories must have these flags set. */ +#define XFS_METADIR_DIFLAGS (XFS_METAFILE_DIFLAGS | \ + XFS_DIFLAG_NOSYMLINKS) + +void xfs_metafile_set_iflag(struct xfs_trans *tp, struct xfs_inode *ip, + enum xfs_metafile_type metafile_type); +void xfs_metafile_clear_iflag(struct xfs_trans *tp, struct xfs_inode *ip); + +/* Code specific to kernel/userspace; must be provided externally. */ + +int xfs_trans_metafile_iget(struct xfs_trans *tp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); +int xfs_metafile_iget(struct xfs_mount *mp, xfs_ino_t ino, + enum xfs_metafile_type metafile_type, struct xfs_inode **ipp); + +#endif /* __XFS_METAFILE_H__ */ diff --git a/fs/xfs/libxfs/xfs_ondisk.h b/fs/xfs/libxfs/xfs_ondisk.h index 23c133fd36f5..8bca86e350fd 100644 --- a/fs/xfs/libxfs/xfs_ondisk.h +++ b/fs/xfs/libxfs/xfs_ondisk.h @@ -37,7 +37,7 @@ xfs_check_ondisk_structs(void) XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176); XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104); XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136); - XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264); + XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 272); XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4); XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16); diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index aae67c7afcb9..e6fad3512220 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -180,6 +180,8 @@ xfs_sb_version_to_features( features |= XFS_FEAT_EXCHANGE_RANGE; if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_PARENT) features |= XFS_FEAT_PARENT; + if (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + features |= XFS_FEAT_METADIR; return features; } @@ -703,6 +705,11 @@ __xfs_sb_from_disk( /* Convert on-disk flags to in-memory flags? */ if (convert_xquota) xfs_sb_quota_from_disk(to); + + if (to->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + to->sb_metadirino = be64_to_cpu(from->sb_metadirino); + else + to->sb_metadirino = NULLFSINO; } void @@ -850,6 +857,9 @@ xfs_sb_to_disk( to->sb_lsn = cpu_to_be64(from->sb_lsn); if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID) uuid_copy(&to->sb_meta_uuid, &from->sb_meta_uuid); + + if (from->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_METADIR) + to->sb_metadirino = cpu_to_be64(from->sb_metadirino); } /* @@ -1299,6 +1309,8 @@ xfs_fs_geometry( geo->flags |= XFS_FSOP_GEOM_FLAGS_NREXT64; if (xfs_has_exchange_range(mp)) geo->flags |= XFS_FSOP_GEOM_FLAGS_EXCHANGE_RANGE; + if (xfs_has_metadir(mp)) + geo->flags |= XFS_FSOP_GEOM_FLAGS_METADIR; geo->rtsectsize = sbp->sb_blocksize; geo->dirblocksize = xfs_dir2_dirblock_bytes(sbp); diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index c91db4f51407..1cbfe57e971d 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -111,7 +111,7 @@ xfs_verify_ino( /* Is this an internal inode number? */ inline bool -xfs_internal_inum( +xfs_is_sb_inum( struct xfs_mount *mp, xfs_ino_t ino) { @@ -129,7 +129,7 @@ xfs_verify_dir_ino( struct xfs_mount *mp, xfs_ino_t ino) { - if (xfs_internal_inum(mp, ino)) + if (xfs_is_sb_inum(mp, ino)) return false; return xfs_verify_ino(mp, ino); } diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index d3cb6ff3b913..25053a66c225 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -230,7 +230,7 @@ bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, xfs_fsblock_t len); bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); -bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); +bool xfs_is_sb_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); bool xfs_verify_rtbext(struct xfs_mount *mp, xfs_rtblock_t rtbno, diff --git a/fs/xfs/scrub/agheader.c b/fs/xfs/scrub/agheader.c index f8e5b67128d2..cad997f38a42 100644 --- a/fs/xfs/scrub/agheader.c +++ b/fs/xfs/scrub/agheader.c @@ -144,6 +144,11 @@ xchk_superblock( if (sb->sb_rootino != cpu_to_be64(mp->m_sb.sb_rootino)) xchk_block_set_preen(sc, bp); + if (xfs_has_metadir(sc->mp)) { + if (sb->sb_metadirino != cpu_to_be64(mp->m_sb.sb_metadirino)) + xchk_block_set_preen(sc, bp); + } + if (sb->sb_rbmino != cpu_to_be64(mp->m_sb.sb_rbmino)) xchk_block_set_preen(sc, bp); diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index e8b5e73bab60..c26a3314237a 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -39,6 +39,7 @@ #include "scrub/trace.h" #include "scrub/repair.h" #include "scrub/health.h" +#include "scrub/tempfile.h" /* Common code for the metadata scrubbers. */ @@ -947,9 +948,15 @@ xchk_iget_for_scrubbing( if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) return xchk_install_live_inode(sc, ip_in); - /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_internal_inum(mp, sc->sm->sm_ino)) + /* + * On pre-metadir filesystems, reject internal metadata files. For + * metadir filesystems, limited scrubbing of any file in the metadata + * directory tree by handle is allowed, because that is the only way to + * validate the lack of parent pointers in the sb-root metadata inodes. + */ + if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; + /* Reject obviously bad inode numbers. */ if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; @@ -1084,6 +1091,10 @@ xchk_setup_inode_contents( if (error) return error; + error = xrep_tempfile_adjust_directory_tree(sc); + if (error) + return error; + /* Lock the inode so the VFS cannot touch this file. */ xchk_ilock(sc, XFS_IOLOCK_EXCL); @@ -1239,12 +1250,6 @@ xchk_metadata_inode_forks( return 0; } - /* They also should never have extended attributes. */ - if (xfs_inode_hasattr(sc->ip)) { - xchk_ino_set_corrupt(sc, sc->ip->i_ino); - return 0; - } - /* Invoke the data fork scrubber. */ error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTD); if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) @@ -1261,6 +1266,21 @@ xchk_metadata_inode_forks( xchk_ino_set_corrupt(sc, sc->ip->i_ino); } + /* + * Metadata files can only have extended attributes on metadir + * filesystems, either for parent pointers or for actual xattr data. + */ + if (xfs_inode_hasattr(sc->ip)) { + if (!xfs_has_metadir(sc->mp)) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); + if (error || (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) + return error; + } + return 0; } @@ -1446,3 +1466,32 @@ out_rcu: rcu_read_unlock(); return error; } + +/* Is this inode a root directory for either tree? */ +bool +xchk_inode_is_dirtree_root(const struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + return ip == mp->m_rootip || + (xfs_has_metadir(mp) && ip == mp->m_metadirip); +} + +/* Does the superblock point down to this inode? */ +bool +xchk_inode_is_sb_rooted(const struct xfs_inode *ip) +{ + return xchk_inode_is_dirtree_root(ip) || + xfs_is_sb_inum(ip->i_mount, ip->i_ino); +} + +/* What is the root directory inumber for this inode? */ +xfs_ino_t +xchk_inode_rootdir_inum(const struct xfs_inode *ip) +{ + struct xfs_mount *mp = ip->i_mount; + + if (xfs_is_metadir_inode(ip)) + return mp->m_metadirip->i_ino; + return mp->m_rootip->i_ino; +} diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h index f3db628b14e1..b2a81e85ded9 100644 --- a/fs/xfs/scrub/common.h +++ b/fs/xfs/scrub/common.h @@ -73,6 +73,7 @@ int xchk_setup_xattr(struct xfs_scrub *sc); int xchk_setup_symlink(struct xfs_scrub *sc); int xchk_setup_parent(struct xfs_scrub *sc); int xchk_setup_dirtree(struct xfs_scrub *sc); +int xchk_setup_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_setup_rtbitmap(struct xfs_scrub *sc); int xchk_setup_rtsummary(struct xfs_scrub *sc); @@ -242,4 +243,8 @@ void xchk_fsgates_enable(struct xfs_scrub *sc, unsigned int scrub_fshooks); int xchk_inode_is_allocated(struct xfs_scrub *sc, xfs_agino_t agino, bool *inuse); +bool xchk_inode_is_dirtree_root(const struct xfs_inode *ip); +bool xchk_inode_is_sb_rooted(const struct xfs_inode *ip); +xfs_ino_t xchk_inode_rootdir_inum(const struct xfs_inode *ip); + #endif /* __XFS_SCRUB_COMMON_H__ */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index bf9199e8df63..c877bde71e62 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -100,6 +100,14 @@ xchk_dir_check_ftype( if (xfs_mode_to_ftype(VFS_I(ip)->i_mode) != ftype) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); + + /* + * Metadata and regular inodes cannot cross trees. This property + * cannot change without a full inode free and realloc cycle, so it's + * safe to check this without holding locks. + */ + if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(sc->ip)) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } /* @@ -253,7 +261,7 @@ xchk_dir_actor( * If this is ".." in the root inode, check that the inum * matches this dir. */ - if (dp->i_ino == mp->m_sb.sb_rootino && ino != dp->i_ino) + if (xchk_inode_is_dirtree_root(dp) && ino != dp->i_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, offset); } diff --git a/fs/xfs/scrub/dir_repair.c b/fs/xfs/scrub/dir_repair.c index 64679fe08446..249313882108 100644 --- a/fs/xfs/scrub/dir_repair.c +++ b/fs/xfs/scrub/dir_repair.c @@ -415,6 +415,12 @@ xrep_dir_salvage_entry( if (error) return 0; + /* Don't mix metadata and regular directory trees. */ + if (xfs_is_metadir_inode(ip) != xfs_is_metadir_inode(rd->sc->ip)) { + xchk_irele(sc, ip); + return 0; + } + xname.type = xfs_mode_to_ftype(VFS_I(ip)->i_mode); xchk_irele(sc, ip); @@ -1270,7 +1276,7 @@ xrep_dir_scan_dirtree( int error; /* Roots of directory trees are their own parents. */ - if (sc->ip == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(sc->ip)) xrep_findparent_scan_found(&rd->pscan, sc->ip->i_ino); /* @@ -1632,6 +1638,7 @@ xrep_dir_swap( struct xrep_dir *rd) { struct xfs_scrub *sc = rd->sc; + xfs_ino_t ino; bool ip_local, temp_local; int error = 0; @@ -1649,14 +1656,17 @@ xrep_dir_swap( /* * Reset the temporary directory's '..' entry to point to the parent - * that we found. The temporary directory was created with the root - * directory as the parent, so we can skip this if repairing a - * subdirectory of the root. + * that we found. The dirent replace code asserts if the dirent + * already points at the new inumber, so we look it up here. * * It's also possible that this replacement could also expand a sf * tempdir into block format. */ - if (rd->pscan.parent_ino != sc->mp->m_rootip->i_ino) { + error = xchk_dir_lookup(sc, rd->sc->tempip, &xfs_name_dotdot, &ino); + if (error) + return error; + + if (rd->pscan.parent_ino != ino) { error = xrep_dir_replace(rd, rd->sc->tempip, &xfs_name_dotdot, rd->pscan.parent_ino, rd->tx.req.resblks); if (error) diff --git a/fs/xfs/scrub/dirtree.c b/fs/xfs/scrub/dirtree.c index bde58fb561ea..3a9cdf8738b6 100644 --- a/fs/xfs/scrub/dirtree.c +++ b/fs/xfs/scrub/dirtree.c @@ -362,7 +362,8 @@ xchk_dirpath_set_outcome( STATIC int xchk_dirpath_step_up( struct xchk_dirtree *dl, - struct xchk_dirpath *path) + struct xchk_dirpath *path, + bool is_metadir) { struct xfs_scrub *sc = dl->sc; struct xfs_inode *dp; @@ -435,6 +436,14 @@ xchk_dirpath_step_up( goto out_scanlock; } + /* Parent must be in the same directory tree. */ + if (is_metadir != xfs_is_metadir_inode(dp)) { + trace_xchk_dirpath_crosses_tree(dl->sc, dp, path->path_nr, + path->nr_steps, &dl->xname, &dl->pptr_rec); + error = -EFSCORRUPTED; + goto out_scanlock; + } + /* * If the extended attributes look as though they has been zapped by * the inode record repair code, we cannot scan for parent pointers. @@ -508,6 +517,7 @@ xchk_dirpath_walk_upwards( struct xchk_dirpath *path) { struct xfs_scrub *sc = dl->sc; + bool is_metadir; int error; ASSERT(sc->ilock_flags & XFS_ILOCK_EXCL); @@ -538,6 +548,7 @@ xchk_dirpath_walk_upwards( * ILOCK state is no longer tracked in the scrub context. Hence we * must drop @sc->ip's ILOCK during the walk. */ + is_metadir = xfs_is_metadir_inode(sc->ip); mutex_unlock(&dl->lock); xchk_iunlock(sc, XFS_ILOCK_EXCL); @@ -547,7 +558,7 @@ xchk_dirpath_walk_upwards( * If we see any kind of error here (including corruptions), the parent * pointer of @sc->ip is corrupt. Stop the whole scan. */ - error = xchk_dirpath_step_up(dl, path); + error = xchk_dirpath_step_up(dl, path, is_metadir); if (error) { xchk_ilock(sc, XFS_ILOCK_EXCL); mutex_lock(&dl->lock); @@ -560,7 +571,7 @@ xchk_dirpath_walk_upwards( * *somewhere* in the path, but we don't need to stop scanning. */ while (!error && path->outcome == XCHK_DIRPATH_SCANNING) - error = xchk_dirpath_step_up(dl, path); + error = xchk_dirpath_step_up(dl, path, is_metadir); /* Retake the locks we had, mark paths, etc. */ xchk_ilock(sc, XFS_ILOCK_EXCL); @@ -917,7 +928,7 @@ xchk_dirtree( * scan, because the hook doesn't detach until after sc->ip gets * released during teardown. */ - dl->root_ino = sc->mp->m_rootip->i_ino; + dl->root_ino = xchk_inode_rootdir_inum(sc->ip); dl->scan_ino = sc->ip->i_ino; trace_xchk_dirtree_start(sc->ip, sc->sm, 0); @@ -983,3 +994,16 @@ out: trace_xchk_dirtree_done(sc->ip, sc->sm, error); return error; } + +/* Does the directory targetted by this scrub have no parents? */ +bool +xchk_dirtree_parentless(const struct xchk_dirtree *dl) +{ + struct xfs_scrub *sc = dl->sc; + + if (xchk_inode_is_dirtree_root(sc->ip)) + return true; + if (VFS_I(sc->ip)->i_nlink == 0) + return true; + return false; +} diff --git a/fs/xfs/scrub/dirtree.h b/fs/xfs/scrub/dirtree.h index 1e1686365c61..9e5d95492717 100644 --- a/fs/xfs/scrub/dirtree.h +++ b/fs/xfs/scrub/dirtree.h @@ -156,17 +156,7 @@ struct xchk_dirtree { #define xchk_dirtree_for_each_path(dl, path) \ list_for_each_entry((path), &(dl)->path_list, list) -static inline bool -xchk_dirtree_parentless(const struct xchk_dirtree *dl) -{ - struct xfs_scrub *sc = dl->sc; - - if (sc->ip == sc->mp->m_rootip) - return true; - if (VFS_I(sc->ip)->i_nlink == 0) - return true; - return false; -} +bool xchk_dirtree_parentless(const struct xchk_dirtree *dl); int xchk_dirtree_find_paths_to_root(struct xchk_dirtree *dl); int xchk_dirpath_append(struct xchk_dirtree *dl, struct xfs_inode *ip, diff --git a/fs/xfs/scrub/findparent.c b/fs/xfs/scrub/findparent.c index 01766041ba2c..84487072b6dd 100644 --- a/fs/xfs/scrub/findparent.c +++ b/fs/xfs/scrub/findparent.c @@ -172,6 +172,10 @@ xrep_findparent_walk_directory( */ lock_mode = xfs_ilock_data_map_shared(dp); + /* Don't mix metadata and regular directory trees. */ + if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) + goto out_unlock; + /* * If this directory is known to be sick, we cannot scan it reliably * and must abort. @@ -362,15 +366,24 @@ xrep_findparent_confirm( }; int error; - /* - * The root directory always points to itself. Unlinked dirs can point - * anywhere, so we point them at the root dir too. - */ - if (sc->ip == sc->mp->m_rootip || VFS_I(sc->ip)->i_nlink == 0) { + /* The root directory always points to itself. */ + if (sc->ip == sc->mp->m_rootip) { *parent_ino = sc->mp->m_sb.sb_rootino; return 0; } + /* The metadata root directory always points to itself. */ + if (sc->ip == sc->mp->m_metadirip) { + *parent_ino = sc->mp->m_sb.sb_metadirino; + return 0; + } + + /* Unlinked dirs can point anywhere; point them up to the root dir. */ + if (VFS_I(sc->ip)->i_nlink == 0) { + *parent_ino = xchk_inode_rootdir_inum(sc->ip); + return 0; + } + /* Reject garbage parent inode numbers and self-referential parents. */ if (*parent_ino == NULLFSINO) return 0; @@ -412,8 +425,11 @@ xrep_findparent_self_reference( if (sc->ip->i_ino == sc->mp->m_sb.sb_rootino) return sc->mp->m_sb.sb_rootino; + if (sc->ip->i_ino == sc->mp->m_sb.sb_metadirino) + return sc->mp->m_sb.sb_metadirino; + if (VFS_I(sc->ip)->i_nlink == 0) - return sc->mp->m_sb.sb_rootino; + return xchk_inode_rootdir_inum(sc->ip); return NULLFSINO; } diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c index 6ceef3749e3b..b8b92ff3a573 100644 --- a/fs/xfs/scrub/health.c +++ b/fs/xfs/scrub/health.c @@ -109,6 +109,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK }, [XFS_SCRUB_TYPE_NLINKS] = { XHG_FS, XFS_SICK_FS_NLINKS }, [XFS_SCRUB_TYPE_DIRTREE] = { XHG_INO, XFS_SICK_INO_DIRTREE }, + [XFS_SCRUB_TYPE_METAPATH] = { XHG_FS, XFS_SICK_FS_METAPATH }, }; /* Return the health status mask for this scrub type. */ diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index d32716fb2fec..25ee66e7649d 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -60,6 +60,22 @@ xchk_install_handle_iscrub( if (error) return error; + /* + * Don't allow scrubbing by handle of any non-directory inode records + * in the metadata directory tree. We don't know if any of the scans + * launched by this scrubber will end up indirectly trying to lock this + * file. + * + * Scrubbers of inode-rooted metadata files (e.g. quota files) will + * attach all the resources needed to scrub the inode and call + * xchk_inode directly. Userspace cannot call this directly. + */ + if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) { + xchk_irele(sc, ip); + sc->ip = NULL; + return -ENOENT; + } + return xchk_prepare_iscrub(sc); } @@ -94,9 +110,15 @@ xchk_setup_inode( return xchk_prepare_iscrub(sc); } - /* Reject internal metadata files and obviously bad inode numbers. */ - if (xfs_internal_inum(mp, sc->sm->sm_ino)) + /* + * On pre-metadir filesystems, reject internal metadata files. For + * metadir filesystems, limited scrubbing of any file in the metadata + * directory tree by handle is allowed, because that is the only way to + * validate the lack of parent pointers in the sb-root metadata inodes. + */ + if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) return -ENOENT; + /* Reject obviously bad inode numbers. */ if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) return -ENOENT; @@ -421,8 +443,13 @@ xchk_dinode( break; case 2: case 3: - if (dip->di_onlink != 0) - xchk_ino_set_corrupt(sc, ino); + if (xfs_dinode_is_metadir(dip)) { + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + xchk_ino_set_corrupt(sc, ino); + } else { + if (dip->di_metatype != 0) + xchk_ino_set_corrupt(sc, ino); + } if (dip->di_mode == 0 && sc->ip) xchk_ino_set_corrupt(sc, ino); diff --git a/fs/xfs/scrub/inode_repair.c b/fs/xfs/scrub/inode_repair.c index 5da9e1a387a8..5a58ddd27bd2 100644 --- a/fs/xfs/scrub/inode_repair.c +++ b/fs/xfs/scrub/inode_repair.c @@ -521,10 +521,17 @@ STATIC void xrep_dinode_nlinks( struct xfs_dinode *dip) { - if (dip->di_version > 1) - dip->di_onlink = 0; - else + if (dip->di_version < 2) { dip->di_nlink = 0; + return; + } + + if (xfs_dinode_is_metadir(dip)) { + if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) + dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN); + } else { + dip->di_metatype = 0; + } } /* Fix any conflicting flags that the verifiers complain about. */ @@ -565,6 +572,16 @@ xrep_dinode_flags( dip->di_nrext64_pad = 0; else if (dip->di_version >= 3) dip->di_v3_pad = 0; + + if (flags2 & XFS_DIFLAG2_METADATA) { + xfs_failaddr_t fa; + + fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags, + flags2); + if (fa) + flags2 &= ~XFS_DIFLAG2_METADATA; + } + dip->di_flags = cpu_to_be16(flags); dip->di_flags2 = cpu_to_be64(flags2); } @@ -1754,15 +1771,8 @@ xrep_inode_pptr( if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) return 0; - /* The root directory doesn't have a parent pointer. */ - if (ip == mp->m_rootip) - return 0; - - /* - * Metadata inodes are rooted in the superblock and do not have any - * parents. - */ - if (xfs_is_metadata_inode(ip)) + /* Children of the superblock do not have parent pointers. */ + if (xchk_inode_is_sb_rooted(ip)) return 0; /* Inode already has an attr fork; no further work possible here. */ diff --git a/fs/xfs/scrub/metapath.c b/fs/xfs/scrub/metapath.c new file mode 100644 index 000000000000..edc1a395c401 --- /dev/null +++ b/fs/xfs/scrub/metapath.c @@ -0,0 +1,521 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2023-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_log_format.h" +#include "xfs_trans.h" +#include "xfs_inode.h" +#include "xfs_metafile.h" +#include "xfs_quota.h" +#include "xfs_qm.h" +#include "xfs_dir2.h" +#include "xfs_parent.h" +#include "xfs_bmap_btree.h" +#include "xfs_trans_space.h" +#include "xfs_attr.h" +#include "scrub/scrub.h" +#include "scrub/common.h" +#include "scrub/trace.h" +#include "scrub/readdir.h" +#include "scrub/repair.h" + +/* + * Metadata Directory Tree Paths + * ============================= + * + * A filesystem with metadir enabled expects to find metadata structures + * attached to files that are accessible by walking a path down the metadata + * directory tree. Given the metadir path and the incore inode storing the + * metadata, this scrubber ensures that the ondisk metadir path points to the + * ondisk inode represented by the incore inode. + */ + +struct xchk_metapath { + struct xfs_scrub *sc; + + /* Name for lookup */ + struct xfs_name xname; + + /* Directory update for repairs */ + struct xfs_dir_update du; + + /* Path down to this metadata file from the parent directory */ + const char *path; + + /* Directory parent of the metadata file. */ + struct xfs_inode *dp; + + /* Locks held on dp */ + unsigned int dp_ilock_flags; + + /* Transaction block reservations */ + unsigned int link_resblks; + unsigned int unlink_resblks; + + /* Parent pointer updates */ + struct xfs_parent_args link_ppargs; + struct xfs_parent_args unlink_ppargs; + + /* Scratchpads for removing links */ + struct xfs_da_args pptr_args; +}; + +/* Release resources tracked in the buffer. */ +static inline void +xchk_metapath_cleanup( + void *buf) +{ + struct xchk_metapath *mpath = buf; + + if (mpath->dp_ilock_flags) + xfs_iunlock(mpath->dp, mpath->dp_ilock_flags); + kfree(mpath->path); +} + +int +xchk_setup_metapath( + struct xfs_scrub *sc) +{ + if (!xfs_has_metadir(sc->mp)) + return -ENOENT; + if (sc->sm->sm_gen) + return -EINVAL; + + switch (sc->sm->sm_ino) { + case XFS_SCRUB_METAPATH_PROBE: + /* Just probing, nothing else to do. */ + if (sc->sm->sm_agno) + return -EINVAL; + return 0; + default: + return -ENOENT; + } +} + +/* + * Take the ILOCK on the metadata directory parent and child. We do not know + * that the metadata directory is not corrupt, so we lock the parent and try + * to lock the child. Returns 0 if successful, or -EINTR to abort the scrub. + */ +STATIC int +xchk_metapath_ilock_both( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + int error = 0; + + while (true) { + xfs_ilock(mpath->dp, XFS_ILOCK_EXCL); + if (xchk_ilock_nowait(sc, XFS_ILOCK_EXCL)) { + mpath->dp_ilock_flags |= XFS_ILOCK_EXCL; + return 0; + } + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + return error; + + delay(1); + } + + ASSERT(0); + return -EINTR; +} + +/* Unlock parent and child inodes. */ +static inline void +xchk_metapath_iunlock( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + + xchk_iunlock(sc, XFS_ILOCK_EXCL); + + mpath->dp_ilock_flags &= ~XFS_ILOCK_EXCL; + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); +} + +int +xchk_metapath( + struct xfs_scrub *sc) +{ + struct xchk_metapath *mpath = sc->buf; + xfs_ino_t ino = NULLFSINO; + int error; + + /* Just probing, nothing else to do. */ + if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE) + return 0; + + /* Parent required to do anything else. */ + if (mpath->dp == NULL) { + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + return 0; + } + + error = xchk_trans_alloc_empty(sc); + if (error) + return error; + + error = xchk_metapath_ilock_both(mpath); + if (error) + goto out_cancel; + + /* Make sure the parent dir has a dirent pointing to this file. */ + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xchk_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* No directory entry at all */ + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + error = 0; + goto out_ilock; + } + if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) + goto out_ilock; + if (ino != sc->ip->i_ino) { + /* Pointing to wrong inode */ + xchk_ino_set_corrupt(sc, sc->ip->i_ino); + } + +out_ilock: + xchk_metapath_iunlock(mpath); +out_cancel: + xchk_trans_cancel(sc); + return error; +} + +#ifdef CONFIG_XFS_ONLINE_REPAIR +/* Create the dirent represented by the final component of the path. */ +STATIC int +xrep_metapath_link( + struct xchk_metapath *mpath) +{ + struct xfs_scrub *sc = mpath->sc; + + mpath->du.dp = mpath->dp; + mpath->du.name = &mpath->xname; + mpath->du.ip = sc->ip; + + if (xfs_has_parent(sc->mp)) + mpath->du.ppargs = &mpath->link_ppargs; + else + mpath->du.ppargs = NULL; + + trace_xrep_metapath_link(sc, mpath->path, mpath->dp, sc->ip->i_ino); + + return xfs_dir_add_child(sc->tp, mpath->link_resblks, &mpath->du); +} + +/* Remove the dirent at the final component of the path. */ +STATIC int +xrep_metapath_unlink( + struct xchk_metapath *mpath, + xfs_ino_t ino, + struct xfs_inode *ip) +{ + struct xfs_parent_rec rec; + struct xfs_scrub *sc = mpath->sc; + struct xfs_mount *mp = sc->mp; + int error; + + trace_xrep_metapath_unlink(sc, mpath->path, mpath->dp, ino); + + if (!ip) { + /* The child inode isn't allocated. Junk the dirent. */ + xfs_trans_log_inode(sc->tp, mpath->dp, XFS_ILOG_CORE); + return xfs_dir_removename(sc->tp, mpath->dp, &mpath->xname, + ino, mpath->unlink_resblks); + } + + mpath->du.dp = mpath->dp; + mpath->du.name = &mpath->xname; + mpath->du.ip = ip; + mpath->du.ppargs = NULL; + + /* Figure out if we're removing a parent pointer too. */ + if (xfs_has_parent(mp)) { + xfs_inode_to_parent_rec(&rec, ip); + error = xfs_parent_lookup(sc->tp, ip, &mpath->xname, &rec, + &mpath->pptr_args); + switch (error) { + case -ENOATTR: + break; + case 0: + mpath->du.ppargs = &mpath->unlink_ppargs; + break; + default: + return error; + } + } + + return xfs_dir_remove_child(sc->tp, mpath->unlink_resblks, &mpath->du); +} + +/* + * Try to create a dirent in @mpath->dp with the name @mpath->xname that points + * to @sc->ip. Returns: + * + * -EEXIST and an @alleged_child if the dirent that points to the wrong inode; + * 0 if there is now a dirent pointing to @sc->ip; or + * A negative errno on error. + */ +STATIC int +xrep_metapath_try_link( + struct xchk_metapath *mpath, + xfs_ino_t *alleged_child) +{ + struct xfs_scrub *sc = mpath->sc; + xfs_ino_t ino; + int error; + + /* Allocate transaction, lock inodes, join to transaction. */ + error = xchk_trans_alloc(sc, mpath->link_resblks); + if (error) + return error; + + error = xchk_metapath_ilock_both(mpath); + if (error) { + xchk_trans_cancel(sc); + return error; + } + xfs_trans_ijoin(sc->tp, mpath->dp, 0); + xfs_trans_ijoin(sc->tp, sc->ip, 0); + + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* + * There is no dirent in the directory. Create an entry + * pointing to @sc->ip. + */ + error = xrep_metapath_link(mpath); + if (error) + goto out_cancel; + + error = xrep_trans_commit(sc); + xchk_metapath_iunlock(mpath); + return error; + } + if (error) + goto out_cancel; + + if (ino == sc->ip->i_ino) { + /* The dirent already points to @sc->ip; we're done. */ + error = 0; + goto out_cancel; + } + + /* + * The dirent points elsewhere; pass that back so that the caller + * can try to remove the dirent. + */ + *alleged_child = ino; + error = -EEXIST; + +out_cancel: + xchk_trans_cancel(sc); + xchk_metapath_iunlock(mpath); + return error; +} + +/* + * Take the ILOCK on the metadata directory parent and a bad child, if one is + * supplied. We do not know that the metadata directory is not corrupt, so we + * lock the parent and try to lock the child. Returns 0 if successful, or + * -EINTR to abort the repair. The lock state of @dp is not recorded in @mpath. + */ +STATIC int +xchk_metapath_ilock_parent_and_child( + struct xchk_metapath *mpath, + struct xfs_inode *ip) +{ + struct xfs_scrub *sc = mpath->sc; + int error = 0; + + while (true) { + xfs_ilock(mpath->dp, XFS_ILOCK_EXCL); + if (!ip || xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) + return 0; + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + + if (xchk_should_terminate(sc, &error)) + return error; + + delay(1); + } + + ASSERT(0); + return -EINTR; +} + +/* + * Try to remove a dirent in @mpath->dp with the name @mpath->xname that points + * to @alleged_child. Returns: + * + * 0 if there is no longer a dirent; + * -EEXIST if the dirent points to @sc->ip; + * -EAGAIN and an updated @alleged_child if the dirent points elsewhere; or + * A negative errno for any other error. + */ +STATIC int +xrep_metapath_try_unlink( + struct xchk_metapath *mpath, + xfs_ino_t *alleged_child) +{ + struct xfs_scrub *sc = mpath->sc; + struct xfs_inode *ip = NULL; + xfs_ino_t ino; + int error; + + ASSERT(*alleged_child != sc->ip->i_ino); + + trace_xrep_metapath_try_unlink(sc, mpath->path, mpath->dp, + *alleged_child); + + /* + * Allocate transaction, grab the alleged child inode, lock inodes, + * join to transaction. + */ + error = xchk_trans_alloc(sc, mpath->unlink_resblks); + if (error) + return error; + + error = xchk_iget(sc, *alleged_child, &ip); + if (error == -EINVAL || error == -ENOENT) { + /* inode number is bogus, junk the dirent */ + error = 0; + } + if (error) { + xchk_trans_cancel(sc); + return error; + } + + error = xchk_metapath_ilock_parent_and_child(mpath, ip); + if (error) { + xchk_trans_cancel(sc); + return error; + } + xfs_trans_ijoin(sc->tp, mpath->dp, 0); + if (ip) + xfs_trans_ijoin(sc->tp, ip, 0); + + error = xchk_dir_lookup(sc, mpath->dp, &mpath->xname, &ino); + trace_xrep_metapath_lookup(sc, mpath->path, mpath->dp, ino); + if (error == -ENOENT) { + /* + * There is no dirent in the directory anymore. We're ready to + * try the link operation again. + */ + error = 0; + goto out_cancel; + } + if (error) + goto out_cancel; + + if (ino == sc->ip->i_ino) { + /* The dirent already points to @sc->ip; we're done. */ + error = -EEXIST; + goto out_cancel; + } + + /* + * The dirent does not point to the alleged child. Update the caller + * and signal that we want to be called again. + */ + if (ino != *alleged_child) { + *alleged_child = ino; + error = -EAGAIN; + goto out_cancel; + } + + /* Remove the link to the child. */ + error = xrep_metapath_unlink(mpath, ino, ip); + if (error) + goto out_cancel; + + error = xrep_trans_commit(sc); + goto out_unlock; + +out_cancel: + xchk_trans_cancel(sc); +out_unlock: + xfs_iunlock(mpath->dp, XFS_ILOCK_EXCL); + if (ip) { + xfs_iunlock(ip, XFS_ILOCK_EXCL); + xchk_irele(sc, ip); + } + return error; +} + +/* + * Make sure the metadata directory path points to the child being examined. + * + * Repair needs to be able to create a directory structure, create its own + * transactions, and take ILOCKs. This function /must/ be called after all + * other repairs have completed. + */ +int +xrep_metapath( + struct xfs_scrub *sc) +{ + struct xchk_metapath *mpath = sc->buf; + struct xfs_mount *mp = sc->mp; + int error = 0; + + /* Just probing, nothing to repair. */ + if (sc->sm->sm_ino == XFS_SCRUB_METAPATH_PROBE) + return 0; + + /* Parent required to do anything else. */ + if (mpath->dp == NULL) + return -EFSCORRUPTED; + + /* + * Make sure the child file actually has an attr fork to receive a new + * parent pointer if the fs has parent pointers. + */ + if (xfs_has_parent(mp)) { + error = xfs_attr_add_fork(sc->ip, + sizeof(struct xfs_attr_sf_hdr), 1); + if (error) + return error; + } + + /* Compute block reservation required to unlink and link a file. */ + mpath->unlink_resblks = xfs_remove_space_res(mp, MAXNAMELEN); + mpath->link_resblks = xfs_link_space_res(mp, MAXNAMELEN); + + do { + xfs_ino_t alleged_child; + + /* Re-establish the link, or tell us which inode to remove. */ + error = xrep_metapath_try_link(mpath, &alleged_child); + if (!error) + return 0; + if (error != -EEXIST) + return error; + + /* + * Remove an incorrect link to an alleged child, or tell us + * which inode to remove. + */ + do { + error = xrep_metapath_try_unlink(mpath, &alleged_child); + } while (error == -EAGAIN); + if (error == -EEXIST) { + /* Link established; we're done. */ + error = 0; + break; + } + } while (!error); + + return error; +} +#endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/nlinks.c b/fs/xfs/scrub/nlinks.c index 80aee30886c4..4a47d0aabf73 100644 --- a/fs/xfs/scrub/nlinks.c +++ b/fs/xfs/scrub/nlinks.c @@ -279,7 +279,7 @@ xchk_nlinks_collect_dirent( * determine the backref count. */ if (dotdot) { - if (dp == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(dp)) error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); else if (!xfs_has_parent(sc->mp)) error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); @@ -735,7 +735,7 @@ xchk_nlinks_compare_inode( } } - if (ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(ip)) { /* * For the root of a directory tree, both the '.' and '..' * entries should point to the root directory. The dotdot diff --git a/fs/xfs/scrub/nlinks_repair.c b/fs/xfs/scrub/nlinks_repair.c index b3e707f47b7b..4ebdee095428 100644 --- a/fs/xfs/scrub/nlinks_repair.c +++ b/fs/xfs/scrub/nlinks_repair.c @@ -60,11 +60,9 @@ xrep_nlinks_is_orphaned( unsigned int actual_nlink, const struct xchk_nlink *obs) { - struct xfs_mount *mp = ip->i_mount; - if (obs->parents != 0) return false; - if (ip == mp->m_rootip || ip == sc->orphanage) + if (xchk_inode_is_dirtree_root(ip) || ip == sc->orphanage) return false; return actual_nlink != 0; } diff --git a/fs/xfs/scrub/orphanage.c b/fs/xfs/scrub/orphanage.c index 7148d8362db8..c287c755f2c5 100644 --- a/fs/xfs/scrub/orphanage.c +++ b/fs/xfs/scrub/orphanage.c @@ -295,7 +295,9 @@ xrep_orphanage_can_adopt( return false; if (sc->ip == sc->orphanage) return false; - if (xfs_internal_inum(sc->mp, sc->ip->i_ino)) + if (xchk_inode_is_sb_rooted(sc->ip)) + return false; + if (xfs_is_internal_inode(sc->ip)) return false; return true; } diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 91e7b51ce068..3b692c4acc1e 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -132,6 +132,14 @@ xchk_parent_validate( return 0; } + /* Is this the metadata root dir? Then '..' must point to itself. */ + if (sc->ip == mp->m_metadirip) { + if (sc->ip->i_ino != mp->m_sb.sb_metadirino || + sc->ip->i_ino != parent_ino) + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + return 0; + } + /* '..' must not point to ourselves. */ if (sc->ip->i_ino == parent_ino) { xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); @@ -185,6 +193,12 @@ xchk_parent_validate( goto out_unlock; } + /* Metadata and regular inodes cannot cross trees. */ + if (xfs_is_metadir_inode(dp) != xfs_is_metadir_inode(sc->ip)) { + xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); + goto out_unlock; + } + /* Look for a directory entry in the parent pointing to the child. */ error = xchk_dir_walk(sc, dp, xchk_parent_actor, &spc); if (!xchk_fblock_xref_process_error(sc, XFS_DATA_FORK, 0, &error)) @@ -300,7 +314,7 @@ xchk_parent_pptr_and_dotdot( } /* Is this the root dir? Then '..' must point to itself. */ - if (sc->ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(sc->ip)) { if (sc->ip->i_ino != pp->parent_ino) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; @@ -711,7 +725,7 @@ xchk_parent_count_pptrs( } if (S_ISDIR(VFS_I(sc->ip)->i_mode)) { - if (sc->ip == sc->mp->m_rootip) + if (xchk_inode_is_dirtree_root(sc->ip)) pp->pptrs_found++; if (VFS_I(sc->ip)->i_nlink == 0 && pp->pptrs_found > 0) @@ -720,6 +734,14 @@ xchk_parent_count_pptrs( pp->pptrs_found == 0) xchk_ino_set_corrupt(sc, sc->ip->i_ino); } else { + /* + * Starting with metadir, we allow checking of parent pointers + * of non-directory files that are children of the superblock. + * Pretend that we found a parent pointer attr. + */ + if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip)) + pp->pptrs_found++; + if (VFS_I(sc->ip)->i_nlink != pp->pptrs_found) xchk_ino_set_corrupt(sc, sc->ip->i_ino); } @@ -885,10 +907,9 @@ bool xchk_pptr_looks_zapped( struct xfs_inode *ip) { - struct xfs_mount *mp = ip->i_mount; struct inode *inode = VFS_I(ip); - ASSERT(xfs_has_parent(mp)); + ASSERT(xfs_has_parent(ip->i_mount)); /* * Temporary files that cannot be linked into the directory tree do not @@ -902,15 +923,15 @@ xchk_pptr_looks_zapped( * of a parent pointer scan is always the empty set. It's safe to scan * them even if the attr fork was zapped. */ - if (ip == mp->m_rootip) + if (xchk_inode_is_dirtree_root(ip)) return false; /* - * Metadata inodes are all rooted in the superblock and do not have - * any parents. Hence the attr fork will not be initialized, but - * there are no parent pointers that might have been zapped. + * Metadata inodes that are rooted in the superblock do not have any + * parents. Hence the attr fork will not be initialized, but there are + * no parent pointers that might have been zapped. */ - if (xfs_is_metadata_inode(ip)) + if (xchk_inode_is_sb_rooted(ip)) return false; /* diff --git a/fs/xfs/scrub/parent_repair.c b/fs/xfs/scrub/parent_repair.c index 7b42b7f65a0b..31bfe10be22a 100644 --- a/fs/xfs/scrub/parent_repair.c +++ b/fs/xfs/scrub/parent_repair.c @@ -1334,7 +1334,7 @@ xrep_parent_rebuild_pptrs( * so that we can decide if we're moving this file to the orphanage. * For this purpose, root directories are their own parents. */ - if (sc->ip == sc->mp->m_rootip) { + if (xchk_inode_is_dirtree_root(sc->ip)) { xrep_findparent_scan_found(&rp->pscan, sc->ip->i_ino); } else { error = xrep_parent_lookup_pptrs(sc, &parent_ino); @@ -1354,21 +1354,40 @@ STATIC int xrep_parent_rebuild_tree( struct xrep_parent *rp) { + struct xfs_scrub *sc = rp->sc; + bool try_adoption; int error; - if (xfs_has_parent(rp->sc->mp)) { + if (xfs_has_parent(sc->mp)) { error = xrep_parent_rebuild_pptrs(rp); if (error) return error; } - if (rp->pscan.parent_ino == NULLFSINO) { - if (xrep_orphanage_can_adopt(rp->sc)) + /* + * Any file with no parent could be adopted. This check happens after + * rebuilding the parent pointer structure because we might have cycled + * the ILOCK during that process. + */ + try_adoption = rp->pscan.parent_ino == NULLFSINO; + + /* + * Starting with metadir, we allow checking of parent pointers + * of non-directory files that are children of the superblock. + * Lack of parent is ok here. + */ + if (try_adoption && xfs_has_metadir(sc->mp) && + xchk_inode_is_sb_rooted(sc->ip)) + try_adoption = false; + + if (try_adoption) { + if (xrep_orphanage_can_adopt(sc)) return xrep_parent_move_to_orphanage(rp); return -EFSCORRUPTED; + } - if (S_ISDIR(VFS_I(rp->sc->ip)->i_mode)) + if (S_ISDIR(VFS_I(sc->ip)->i_mode)) return xrep_parent_reset_dotdot(rp); return 0; @@ -1422,6 +1441,14 @@ xrep_parent_set_nondir_nlink( if (error) return error; + /* + * Starting with metadir, we allow checking of parent pointers of + * non-directory files that are children of the superblock. Pretend + * that we found a parent pointer attr. + */ + if (xfs_has_metadir(sc->mp) && xchk_inode_is_sb_rooted(sc->ip)) + rp->parents++; + if (rp->parents > 0 && xfs_inode_on_unlinked_list(ip)) { xfs_trans_ijoin(sc->tp, sc->ip, 0); joined = true; diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c index c77eb2de8df7..dc4033b91e44 100644 --- a/fs/xfs/scrub/quotacheck.c +++ b/fs/xfs/scrub/quotacheck.c @@ -398,10 +398,13 @@ xqcheck_collect_inode( bool isreg = S_ISREG(VFS_I(ip)->i_mode); int error = 0; - if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) { + if (xfs_is_metadir_inode(ip) || + xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) { /* * Quota files are never counted towards quota, so we do not - * need to take the lock. + * need to take the lock. Files do not switch between the + * metadata and regular directory trees without a reallocation, + * so we do not need to ILOCK them either. */ xchk_iscan_mark_visited(&xqc->iscan, ip); return 0; diff --git a/fs/xfs/scrub/refcount_repair.c b/fs/xfs/scrub/refcount_repair.c index 4240fff459cb..4e572b81c986 100644 --- a/fs/xfs/scrub/refcount_repair.c +++ b/fs/xfs/scrub/refcount_repair.c @@ -215,7 +215,7 @@ xrep_refc_rmap_shareable( return false; /* Metadata in files are never shareable */ - if (xfs_internal_inum(mp, rmap->rm_owner)) + if (xfs_is_sb_inum(mp, rmap->rm_owner)) return false; /* Metadata and unwritten file blocks are not shareable. */ diff --git a/fs/xfs/scrub/repair.c b/fs/xfs/scrub/repair.c index 646ac8ade88d..f80000d77552 100644 --- a/fs/xfs/scrub/repair.c +++ b/fs/xfs/scrub/repair.c @@ -1082,7 +1082,12 @@ xrep_metadata_inode_forks( if (error) return error; - /* Make sure the attr fork looks ok before we delete it. */ + /* + * Metadata files can only have extended attributes on metadir + * filesystems, either for parent pointers or for actual xattr data. + * For a non-metadir filesystem, make sure the attr fork looks ok + * before we delete it. + */ if (xfs_inode_hasattr(sc->ip)) { error = xrep_metadata_inode_subtype(sc, XFS_SCRUB_TYPE_BMBTA); if (error) @@ -1098,8 +1103,11 @@ xrep_metadata_inode_forks( return error; } - /* Clear the attr forks since metadata shouldn't have that. */ - if (xfs_inode_hasattr(sc->ip)) { + /* + * Metadata files on non-metadir filesystems cannot have attr forks, + * so clear them now. + */ + if (xfs_inode_hasattr(sc->ip) && !xfs_has_metadir(sc->mp)) { if (!dirty) { dirty = true; xfs_trans_ijoin(sc->tp, sc->ip, 0); diff --git a/fs/xfs/scrub/repair.h b/fs/xfs/scrub/repair.h index 0e0dc2bf985c..90f9cb3b5ad8 100644 --- a/fs/xfs/scrub/repair.h +++ b/fs/xfs/scrub/repair.h @@ -134,6 +134,7 @@ int xrep_directory(struct xfs_scrub *sc); int xrep_parent(struct xfs_scrub *sc); int xrep_symlink(struct xfs_scrub *sc); int xrep_dirtree(struct xfs_scrub *sc); +int xrep_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xrep_rtbitmap(struct xfs_scrub *sc); @@ -208,6 +209,7 @@ xrep_setup_nothing( #define xrep_setup_parent xrep_setup_nothing #define xrep_setup_nlinks xrep_setup_nothing #define xrep_setup_dirtree xrep_setup_nothing +#define xrep_setup_metapath xrep_setup_nothing #define xrep_setup_inode(sc, imap) ((void)0) @@ -243,6 +245,7 @@ static inline int xrep_setup_symlink(struct xfs_scrub *sc, unsigned int *x) #define xrep_parent xrep_notsupported #define xrep_symlink xrep_notsupported #define xrep_dirtree xrep_notsupported +#define xrep_metapath xrep_notsupported #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c index 4cbcf7a86dbe..1ac33bea6f0a 100644 --- a/fs/xfs/scrub/scrub.c +++ b/fs/xfs/scrub/scrub.c @@ -442,6 +442,13 @@ static const struct xchk_meta_ops meta_scrub_ops[] = { .has = xfs_has_parent, .repair = xrep_dirtree, }, + [XFS_SCRUB_TYPE_METAPATH] = { /* metadata directory tree path */ + .type = ST_GENERIC, + .setup = xchk_setup_metapath, + .scrub = xchk_metapath, + .has = xfs_has_metadir, + .repair = xrep_metapath, + }, }; static int @@ -489,6 +496,8 @@ xchk_validate_inputs( if (sm->sm_agno || (sm->sm_gen && !sm->sm_ino)) goto out; break; + case ST_GENERIC: + break; default: goto out; } @@ -605,8 +614,7 @@ xfs_scrub_metadata( if (error) goto out; - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SCRUB, - "EXPERIMENTAL online scrub feature in use. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SCRUB); sc = kzalloc(sizeof(struct xfs_scrub), XCHK_GFP_FLAGS); if (!sc) { diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h index 5993fcaffb2c..c688ff4fc7fc 100644 --- a/fs/xfs/scrub/scrub.h +++ b/fs/xfs/scrub/scrub.h @@ -73,6 +73,7 @@ enum xchk_type { ST_PERAG, /* per-AG metadata */ ST_FS, /* per-FS metadata */ ST_INODE, /* per-inode metadata */ + ST_GENERIC, /* determined by the scrubber */ }; struct xchk_meta_ops { @@ -255,6 +256,7 @@ int xchk_xattr(struct xfs_scrub *sc); int xchk_symlink(struct xfs_scrub *sc); int xchk_parent(struct xfs_scrub *sc); int xchk_dirtree(struct xfs_scrub *sc); +int xchk_metapath(struct xfs_scrub *sc); #ifdef CONFIG_XFS_RT int xchk_rtbitmap(struct xfs_scrub *sc); int xchk_rtsummary(struct xfs_scrub *sc); diff --git a/fs/xfs/scrub/stats.c b/fs/xfs/scrub/stats.c index 7996c2335476..edcd02dc2e62 100644 --- a/fs/xfs/scrub/stats.c +++ b/fs/xfs/scrub/stats.c @@ -80,6 +80,7 @@ static const char *name_map[XFS_SCRUB_TYPE_NR] = { [XFS_SCRUB_TYPE_QUOTACHECK] = "quotacheck", [XFS_SCRUB_TYPE_NLINKS] = "nlinks", [XFS_SCRUB_TYPE_DIRTREE] = "dirtree", + [XFS_SCRUB_TYPE_METAPATH] = "metapath", }; /* Format the scrub stats into a text buffer, similar to pcp style. */ diff --git a/fs/xfs/scrub/tempfile.c b/fs/xfs/scrub/tempfile.c index 177f922acfaf..4b7f7860e37e 100644 --- a/fs/xfs/scrub/tempfile.c +++ b/fs/xfs/scrub/tempfile.c @@ -22,6 +22,7 @@ #include "xfs_exchmaps.h" #include "xfs_defer.h" #include "xfs_symlink_remote.h" +#include "xfs_metafile.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" @@ -182,6 +183,101 @@ out_release_dquots: return error; } +/* + * Temporary files have to be created before we even know which inode we're + * going to scrub, so we assume that they will be part of the regular directory + * tree. If it turns out that we're actually scrubbing a file from the + * metadata directory tree, we have to subtract the temp file from the root + * dquots and detach the dquots. + */ +int +xrep_tempfile_adjust_directory_tree( + struct xfs_scrub *sc) +{ + int error; + + if (!sc->tempip) + return 0; + + ASSERT(sc->tp == NULL); + ASSERT(!xfs_is_metadir_inode(sc->tempip)); + + if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) + return 0; + + xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); + sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; + + error = xchk_trans_alloc(sc, 0); + if (error) + goto out_iolock; + + xrep_tempfile_ilock(sc); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + + /* Metadir files are not accounted in quota, so drop icount */ + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L); + xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN); + + error = xrep_trans_commit(sc); + if (error) + goto out_ilock; + + xfs_qm_dqdetach(sc->tempip); +out_ilock: + xrep_tempfile_iunlock(sc); +out_iolock: + xrep_tempfile_iounlock(sc); + return error; +} + +/* + * Remove this temporary file from the metadata directory tree so that it can + * be inactivated the normal way. + */ +STATIC int +xrep_tempfile_remove_metadir( + struct xfs_scrub *sc) +{ + int error; + + if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip)) + return 0; + + ASSERT(sc->tp == NULL); + + xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); + sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; + + error = xchk_trans_alloc(sc, 0); + if (error) + goto out_iolock; + + xrep_tempfile_ilock(sc); + xfs_trans_ijoin(sc->tp, sc->tempip, 0); + + xfs_metafile_clear_iflag(sc->tp, sc->tempip); + + /* Non-metadir files are accounted in quota, so bump bcount/icount */ + error = xfs_qm_dqattach_locked(sc->tempip, false); + if (error) + goto out_cancel; + + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L); + xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT, + sc->tempip->i_nblocks); + error = xrep_trans_commit(sc); + goto out_ilock; + +out_cancel: + xchk_trans_cancel(sc); +out_ilock: + xrep_tempfile_iunlock(sc); +out_iolock: + xrep_tempfile_iounlock(sc); + return error; +} + /* Take IOLOCK_EXCL on the temporary file, maybe. */ bool xrep_tempfile_iolock_nowait( @@ -290,6 +386,7 @@ xrep_tempfile_rele( sc->temp_ilock_flags = 0; } + xrep_tempfile_remove_metadir(sc); xchk_irele(sc, sc->tempip); sc->tempip = NULL; } @@ -844,6 +941,14 @@ xrep_is_tempfile( const struct xfs_inode *ip) { const struct inode *inode = &ip->i_vnode; + struct xfs_mount *mp = ip->i_mount; + + /* + * Files in the metadata directory tree also have S_PRIVATE set and + * IOP_XATTR unset, so we must distinguish them separately. + */ + if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) + return false; if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) return true; diff --git a/fs/xfs/scrub/tempfile.h b/fs/xfs/scrub/tempfile.h index e51399f595fe..71c1b54599c3 100644 --- a/fs/xfs/scrub/tempfile.h +++ b/fs/xfs/scrub/tempfile.h @@ -10,6 +10,8 @@ int xrep_tempfile_create(struct xfs_scrub *sc, uint16_t mode); void xrep_tempfile_rele(struct xfs_scrub *sc); +int xrep_tempfile_adjust_directory_tree(struct xfs_scrub *sc); + bool xrep_tempfile_iolock_nowait(struct xfs_scrub *sc); int xrep_tempfile_iolock_polled(struct xfs_scrub *sc); void xrep_tempfile_iounlock(struct xfs_scrub *sc); @@ -42,6 +44,7 @@ static inline void xrep_tempfile_iolock_both(struct xfs_scrub *sc) xchk_ilock(sc, XFS_IOLOCK_EXCL); } # define xrep_is_tempfile(ip) (false) +# define xrep_tempfile_adjust_directory_tree(sc) (0) # define xrep_tempfile_rele(sc) #endif /* CONFIG_XFS_ONLINE_REPAIR */ diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c index 4470ad0533b8..98f923ae664d 100644 --- a/fs/xfs/scrub/trace.c +++ b/fs/xfs/scrub/trace.c @@ -20,6 +20,7 @@ #include "xfs_dir2.h" #include "xfs_rmap.h" #include "xfs_parent.h" +#include "xfs_metafile.h" #include "scrub/scrub.h" #include "scrub/xfile.h" #include "scrub/xfarray.h" diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 58cc61f2ed53..b6c8d0944fa4 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -70,6 +70,7 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_NLINKS); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_HEALTHY); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_DIRTREE); TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); +TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_METAPATH); #define XFS_SCRUB_TYPE_STRINGS \ { XFS_SCRUB_TYPE_PROBE, "probe" }, \ @@ -101,7 +102,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_BARRIER); { XFS_SCRUB_TYPE_NLINKS, "nlinks" }, \ { XFS_SCRUB_TYPE_HEALTHY, "healthy" }, \ { XFS_SCRUB_TYPE_DIRTREE, "dirtree" }, \ - { XFS_SCRUB_TYPE_BARRIER, "barrier" } + { XFS_SCRUB_TYPE_BARRIER, "barrier" }, \ + { XFS_SCRUB_TYPE_METAPATH, "metapath" } #define XFS_SCRUB_FLAG_STRINGS \ { XFS_SCRUB_IFLAG_REPAIR, "repair" }, \ @@ -1753,6 +1755,7 @@ DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_badgen); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_nondir_parent); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_unlinked_parent); DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_found_next_step); +DEFINE_XCHK_DIRPATH_EVENT(xchk_dirpath_crosses_tree); TRACE_DEFINE_ENUM(XCHK_DIRPATH_SCANNING); TRACE_DEFINE_ENUM(XCHK_DIRPATH_DELETE); @@ -1915,6 +1918,38 @@ TRACE_EVENT(xchk_dirtree_live_update, __get_str(name)) ); +DECLARE_EVENT_CLASS(xchk_metapath_class, + TP_PROTO(struct xfs_scrub *sc, const char *path, + struct xfs_inode *dp, xfs_ino_t ino), + TP_ARGS(sc, path, dp, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, scrub_ino) + __field(xfs_ino_t, parent_ino) + __field(xfs_ino_t, ino) + __string(name, path) + ), + TP_fast_assign( + __entry->dev = sc->mp->m_super->s_dev; + __entry->scrub_ino = sc->ip ? sc->ip->i_ino : NULLFSINO; + __entry->parent_ino = dp ? dp->i_ino : NULLFSINO; + __entry->ino = ino; + __assign_str(name); + ), + TP_printk("dev %d:%d ino 0x%llx parent_ino 0x%llx name '%s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->scrub_ino, + __entry->parent_ino, + __get_str(name), + __entry->ino) +); +#define DEFINE_XCHK_METAPATH_EVENT(name) \ +DEFINE_EVENT(xchk_metapath_class, name, \ + TP_PROTO(struct xfs_scrub *sc, const char *path, \ + struct xfs_inode *dp, xfs_ino_t ino), \ + TP_ARGS(sc, path, dp, ino)) +DEFINE_XCHK_METAPATH_EVENT(xchk_metapath_lookup); + /* repair tracepoints */ #if IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) @@ -3563,6 +3598,11 @@ DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_delete_path); DEFINE_XCHK_DIRTREE_EVENT(xrep_dirtree_create_adoption); DEFINE_XCHK_DIRTREE_EVALUATE_EVENT(xrep_dirtree_decided_fate); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_lookup); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_try_unlink); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_unlink); +DEFINE_XCHK_METAPATH_EVENT(xrep_metapath_link); + #endif /* IS_ENABLED(CONFIG_XFS_ONLINE_REPAIR) */ #endif /* _TRACE_XFS_SCRUB_TRACE_H */ diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index c1b211c260a9..3bf47458c517 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -983,6 +983,7 @@ xfs_qm_dqget_inode( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(xfs_inode_dquot(ip, type) == NULL); + ASSERT(!xfs_is_metadir_inode(ip)); id = xfs_qm_id_for_quotatype(ip, type); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 82812a458cf1..28dde215c899 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -162,9 +162,7 @@ xfs_growfs_data_private( error = xfs_resizefs_init_new_ags(tp, &id, oagcount, nagcount, delta, last_pag, &lastag_extended); } else { - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_SHRINK, - "EXPERIMENTAL online shrink feature in use. Use at your own risk!"); - + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_SHRINK); error = xfs_ag_shrink_space(last_pag, &tp, -delta); } xfs_perag_put(last_pag); diff --git a/fs/xfs/xfs_health.c b/fs/xfs/xfs_health.c index f45f125a669d..e5663e2ac9b8 100644 --- a/fs/xfs/xfs_health.c +++ b/fs/xfs/xfs_health.c @@ -380,6 +380,8 @@ static const struct ioctl_sick_map fs_map[] = { { XFS_SICK_FS_PQUOTA, XFS_FSOP_GEOM_SICK_PQUOTA }, { XFS_SICK_FS_QUOTACHECK, XFS_FSOP_GEOM_SICK_QUOTACHECK }, { XFS_SICK_FS_NLINKS, XFS_FSOP_GEOM_SICK_NLINKS }, + { XFS_SICK_FS_METADIR, XFS_FSOP_GEOM_SICK_METADIR }, + { XFS_SICK_FS_METAPATH, XFS_FSOP_GEOM_SICK_METAPATH }, { 0, 0 }, }; diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 383c24548202..7b6c026d01a1 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -25,6 +25,9 @@ #include "xfs_ag.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_dir2.h" +#include "xfs_metafile.h" #include <linux/iversion.h> @@ -829,6 +832,77 @@ out_error_or_again: } /* + * Get a metadata inode. + * + * The metafile type must match the file mode exactly, and for files in the + * metadata directory tree, it must match the inode's metatype exactly. + */ +int +xfs_trans_metafile_iget( + struct xfs_trans *tp, + xfs_ino_t ino, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + umode_t mode; + int error; + + error = xfs_iget(mp, tp, ino, 0, 0, &ip); + if (error == -EFSCORRUPTED || error == -EINVAL) + goto whine; + if (error) + return error; + + if (VFS_I(ip)->i_nlink == 0) + goto bad_rele; + + if (metafile_type == XFS_METAFILE_DIR) + mode = S_IFDIR; + else + mode = S_IFREG; + if (inode_wrong_type(VFS_I(ip), mode)) + goto bad_rele; + if (xfs_has_metadir(mp)) { + if (!xfs_is_metadir_inode(ip)) + goto bad_rele; + if (metafile_type != ip->i_metatype) + goto bad_rele; + } + + *ipp = ip; + return 0; +bad_rele: + xfs_irele(ip); +whine: + xfs_err(mp, "metadata inode 0x%llx type %u is corrupt", ino, + metafile_type); + xfs_fs_mark_sick(mp, XFS_SICK_FS_METADIR); + return -EFSCORRUPTED; +} + +/* Grab a metadata file if the caller doesn't already have a transaction. */ +int +xfs_metafile_iget( + struct xfs_mount *mp, + xfs_ino_t ino, + enum xfs_metafile_type metafile_type, + struct xfs_inode **ipp) +{ + struct xfs_trans *tp; + int error; + + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + error = xfs_trans_metafile_iget(tp, ino, metafile_type, ipp); + xfs_trans_cancel(tp); + return error; +} + +/* * Grab the inode for reclaim exclusively. * * We have found this inode via a lookup under RCU, so the inode may have diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 693770f9bb09..103cf8b2af24 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -43,6 +43,7 @@ #include "xfs_parent.h" #include "xfs_xattr.h" #include "xfs_inode_util.h" +#include "xfs_metafile.h" struct kmem_cache *xfs_inode_cache; @@ -554,8 +555,20 @@ xfs_lookup( if (error) goto out_free_name; + /* + * Fail if a directory entry in the regular directory tree points to + * a metadata file. + */ + if (XFS_IS_CORRUPT(dp->i_mount, xfs_is_metadir_inode(*ipp))) { + xfs_fs_mark_sick(dp->i_mount, XFS_SICK_FS_METADIR); + error = -EFSCORRUPTED; + goto out_irele; + } + return 0; +out_irele: + xfs_irele(*ipp); out_free_name: if (ci_name) kfree(ci_name->name); @@ -1295,7 +1308,7 @@ xfs_inode_needs_inactive( return false; /* Metadata inodes require explicit resource cleanup. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) return false; /* Want to clean out the cow blocks if there are any. */ @@ -1388,7 +1401,7 @@ xfs_inactive( goto out; /* Metadata inodes require explicit resource cleanup. */ - if (xfs_is_metadata_inode(ip)) + if (xfs_is_internal_inode(ip)) goto out; /* Try to clean out the cow blocks if there are any. */ @@ -3040,7 +3053,7 @@ xfs_inode_alloc_unitsize( /* Should we always be using copy on write for file writes? */ bool xfs_is_always_cow_inode( - struct xfs_inode *ip) + const struct xfs_inode *ip) { return ip->i_mount->m_always_cow && xfs_has_reflink(ip->i_mount); } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 03944b6c5fba..b6e959563547 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -65,6 +65,7 @@ typedef struct xfs_inode { uint16_t i_flushiter; /* incremented on flush */ }; uint8_t i_forkoff; /* attr fork offset >> 3 */ + enum xfs_metafile_type i_metatype; /* XFS_METAFILE_* */ uint16_t i_diflags; /* XFS_DIFLAG_... */ uint64_t i_diflags2; /* XFS_DIFLAG2_... */ struct timespec64 i_crtime; /* time created */ @@ -100,7 +101,7 @@ static inline bool xfs_inode_on_unlinked_list(const struct xfs_inode *ip) return ip->i_prev_unlinked != 0; } -static inline bool xfs_inode_has_attr_fork(struct xfs_inode *ip) +static inline bool xfs_inode_has_attr_fork(const struct xfs_inode *ip) { return ip->i_forkoff > 0; } @@ -271,23 +272,36 @@ xfs_iflags_test_and_set(xfs_inode_t *ip, unsigned long flags) return ret; } -static inline bool xfs_is_reflink_inode(struct xfs_inode *ip) +static inline bool xfs_is_reflink_inode(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_REFLINK; } -static inline bool xfs_is_metadata_inode(const struct xfs_inode *ip) +static inline bool xfs_is_metadir_inode(const struct xfs_inode *ip) +{ + return ip->i_diflags2 & XFS_DIFLAG2_METADATA; +} + +static inline bool xfs_is_internal_inode(const struct xfs_inode *ip) { struct xfs_mount *mp = ip->i_mount; + /* Any file in the metadata directory tree is a metadata inode. */ + if (xfs_has_metadir(mp)) + return xfs_is_metadir_inode(ip); + + /* + * Before metadata directories, the only metadata inodes were the + * three quota files, the realtime bitmap, and the realtime summary. + */ return ip->i_ino == mp->m_sb.sb_rbmino || ip->i_ino == mp->m_sb.sb_rsumino || xfs_is_quota_inode(&mp->m_sb, ip->i_ino); } -bool xfs_is_always_cow_inode(struct xfs_inode *ip); +bool xfs_is_always_cow_inode(const struct xfs_inode *ip); -static inline bool xfs_is_cow_inode(struct xfs_inode *ip) +static inline bool xfs_is_cow_inode(const struct xfs_inode *ip) { return xfs_is_reflink_inode(ip) || xfs_is_always_cow_inode(ip); } @@ -301,17 +315,17 @@ static inline bool xfs_inode_has_filedata(const struct xfs_inode *ip) * Check if an inode has any data in the COW fork. This might be often false * even for inodes with the reflink flag when there is no pending COW operation. */ -static inline bool xfs_inode_has_cow_data(struct xfs_inode *ip) +static inline bool xfs_inode_has_cow_data(const struct xfs_inode *ip) { return ip->i_cowfp && ip->i_cowfp->if_bytes; } -static inline bool xfs_inode_has_bigtime(struct xfs_inode *ip) +static inline bool xfs_inode_has_bigtime(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_BIGTIME; } -static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) +static inline bool xfs_inode_has_large_extent_counts(const struct xfs_inode *ip) { return ip->i_diflags2 & XFS_DIFLAG2_NREXT64; } @@ -320,7 +334,7 @@ static inline bool xfs_inode_has_large_extent_counts(struct xfs_inode *ip) * Decide if this file is a realtime file whose data allocation unit is larger * than a single filesystem block. */ -static inline bool xfs_inode_has_bigrtalloc(struct xfs_inode *ip) +static inline bool xfs_inode_has_bigrtalloc(const struct xfs_inode *ip) { return XFS_IS_REALTIME_INODE(ip) && ip->i_mount->m_sb.sb_rextsize > 1; } @@ -625,9 +639,9 @@ void xfs_sort_inodes(struct xfs_inode **i_tab, unsigned int num_inodes); static inline bool xfs_inode_unlinked_incomplete( - struct xfs_inode *ip) + const struct xfs_inode *ip) { - return VFS_I(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); + return VFS_IC(ip)->i_nlink == 0 && !xfs_inode_on_unlinked_list(ip); } int xfs_inode_reload_unlinked_bucket(struct xfs_trans *tp, struct xfs_inode *ip); int xfs_inode_reload_unlinked(struct xfs_inode *ip); diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c index b509cbd191f4..912f0b1bc3cb 100644 --- a/fs/xfs/xfs_inode_item.c +++ b/fs/xfs/xfs_inode_item.c @@ -556,7 +556,6 @@ xfs_inode_to_log_dinode( to->di_projid_lo = ip->i_projid & 0xffff; to->di_projid_hi = ip->i_projid >> 16; - memset(to->di_pad3, 0, sizeof(to->di_pad3)); to->di_atime = xfs_inode_to_log_dinode_ts(ip, inode_get_atime(inode)); to->di_mtime = xfs_inode_to_log_dinode_ts(ip, inode_get_mtime(inode)); to->di_ctime = xfs_inode_to_log_dinode_ts(ip, inode_get_ctime(inode)); @@ -590,10 +589,16 @@ xfs_inode_to_log_dinode( /* dummy value for initialisation */ to->di_crc = 0; + + if (xfs_is_metadir_inode(ip)) + to->di_metatype = ip->i_metatype; + else + to->di_metatype = 0; } else { to->di_version = 2; to->di_flushiter = ip->i_flushiter; memset(to->di_v2_pad, 0, sizeof(to->di_v2_pad)); + to->di_metatype = 0; } xfs_inode_to_log_dinode_iext_counters(ip, to); diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c index dbdab4ce7c44..e70d2611456b 100644 --- a/fs/xfs/xfs_inode_item_recover.c +++ b/fs/xfs/xfs_inode_item_recover.c @@ -175,7 +175,7 @@ xfs_log_dinode_to_disk( to->di_mode = cpu_to_be16(from->di_mode); to->di_version = from->di_version; to->di_format = from->di_format; - to->di_onlink = 0; + to->di_metatype = cpu_to_be16(from->di_metatype); to->di_uid = cpu_to_be32(from->di_uid); to->di_gid = cpu_to_be32(from->di_gid); to->di_nlink = cpu_to_be32(from->di_nlink); diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index 2567fd2a0994..f36fd8db388c 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -233,6 +233,10 @@ xfs_bulk_ireq_setup( if (hdr->flags & XFS_BULK_IREQ_NREXT64) breq->flags |= XFS_IBULK_NREXT64; + /* Caller wants to see metadata directories in bulkstat output. */ + if (hdr->flags & XFS_BULK_IREQ_METADIR) + breq->flags |= XFS_IBULK_METADIR; + return 0; } @@ -323,6 +327,9 @@ xfs_ioc_inumbers( if (copy_from_user(&hdr, &arg->hdr, sizeof(hdr))) return -EFAULT; + if (hdr.flags & XFS_BULK_IREQ_METADIR) + return -EINVAL; + error = xfs_bulk_ireq_setup(mp, &hdr, &breq, arg->inumbers); if (error == -ECANCELED) goto out_teardown; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index ee79cf161312..66a726a5fbbb 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -42,7 +42,9 @@ * held. For regular files, the lock order is the other way around - the * mmap_lock is taken during the page fault, and then we lock the ilock to do * block mapping. Hence we need a different class for the directory ilock so - * that lockdep can tell them apart. + * that lockdep can tell them apart. Directories in the metadata directory + * tree get a separate class so that lockdep reports will warn us if someone + * ever tries to lock regular directories after locking metadata directories. */ static struct lock_class_key xfs_nondir_ilock_class; static struct lock_class_key xfs_dir_ilock_class; @@ -1289,6 +1291,7 @@ xfs_setup_inode( { struct inode *inode = &ip->i_vnode; gfp_t gfp_mask; + bool is_meta = xfs_is_internal_inode(ip); inode->i_ino = ip->i_ino; inode->i_state |= I_NEW; @@ -1300,6 +1303,16 @@ xfs_setup_inode( i_size_write(inode, ip->i_disk_size); xfs_diflags_to_iflags(ip, true); + /* + * Mark our metadata files as private so that LSMs and the ACL code + * don't try to add their own metadata or reason about these files, + * and users cannot ever obtain file handles to them. + */ + if (is_meta) { + inode->i_flags |= S_PRIVATE; + inode->i_opflags &= ~IOP_XATTR; + } + if (S_ISDIR(inode->i_mode)) { /* * We set the i_rwsem class here to avoid potential races with diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index c0757ab99495..1fa1c0564b0c 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -36,6 +36,14 @@ struct xfs_bstat_chunk { struct xfs_bulkstat *buf; }; +static inline bool +want_metadir_file( + struct xfs_inode *ip, + struct xfs_ibulk *breq) +{ + return xfs_is_metadir_inode(ip) && (breq->flags & XFS_IBULK_METADIR); +} + /* * Fill out the bulkstat info for a single inode and report it somewhere. * @@ -69,9 +77,6 @@ xfs_bulkstat_one_int( vfsuid_t vfsuid; vfsgid_t vfsgid; - if (xfs_internal_inum(mp, ino)) - goto out_advance; - error = xfs_iget(mp, tp, ino, (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED), XFS_ILOCK_SHARED, &ip); @@ -97,8 +102,28 @@ xfs_bulkstat_one_int( vfsuid = i_uid_into_vfsuid(idmap, inode); vfsgid = i_gid_into_vfsgid(idmap, inode); + /* + * If caller wants files from the metadata directories, push out the + * bare minimum information for enabling scrub. + */ + if (want_metadir_file(ip, bc->breq)) { + memset(buf, 0, sizeof(*buf)); + buf->bs_ino = ino; + buf->bs_gen = inode->i_generation; + buf->bs_mode = inode->i_mode & S_IFMT; + xfs_bulkstat_health(ip, buf); + buf->bs_version = XFS_BULKSTAT_VERSION_V5; + xfs_iunlock(ip, XFS_ILOCK_SHARED); + xfs_irele(ip); + + error = bc->formatter(bc->breq, buf); + if (!error || error == -ECANCELED) + goto out_advance; + goto out; + } + /* If this is a private inode, don't leak its details to userspace. */ - if (IS_PRIVATE(inode)) { + if (IS_PRIVATE(inode) || xfs_is_sb_inum(mp, ino)) { xfs_iunlock(ip, XFS_ILOCK_SHARED); xfs_irele(ip); error = -EINVAL; diff --git a/fs/xfs/xfs_itable.h b/fs/xfs/xfs_itable.h index 1659f13f17a8..f10e8f8f2335 100644 --- a/fs/xfs/xfs_itable.h +++ b/fs/xfs/xfs_itable.h @@ -22,6 +22,9 @@ struct xfs_ibulk { /* Fill out the bs_extents64 field if set. */ #define XFS_IBULK_NREXT64 (1U << 1) +/* Signal that we can return metadata directories. */ +#define XFS_IBULK_METADIR (1U << 2) + /* * Advance the user buffer pointer by one record of the given size. If the * buffer is now full, return the appropriate error code. diff --git a/fs/xfs/xfs_message.c b/fs/xfs/xfs_message.c index 8f495cc23903..6ed485ff2756 100644 --- a/fs/xfs/xfs_message.c +++ b/fs/xfs/xfs_message.c @@ -131,3 +131,54 @@ xfs_buf_alert_ratelimited( __xfs_printk(KERN_ALERT, mp, &vaf); va_end(args); } + +void +xfs_warn_experimental( + struct xfs_mount *mp, + enum xfs_experimental_feat feat) +{ + static const struct { + const char *name; + long opstate; + } features[] = { + [XFS_EXPERIMENTAL_PNFS] = { + .opstate = XFS_OPSTATE_WARNED_PNFS, + .name = "pNFS", + }, + [XFS_EXPERIMENTAL_SCRUB] = { + .opstate = XFS_OPSTATE_WARNED_SCRUB, + .name = "online scrub", + }, + [XFS_EXPERIMENTAL_SHRINK] = { + .opstate = XFS_OPSTATE_WARNED_SHRINK, + .name = "online shrink", + }, + [XFS_EXPERIMENTAL_LARP] = { + .opstate = XFS_OPSTATE_WARNED_LARP, + .name = "logged extended attributes", + }, + [XFS_EXPERIMENTAL_LBS] = { + .opstate = XFS_OPSTATE_WARNED_LBS, + .name = "large block size", + }, + [XFS_EXPERIMENTAL_EXCHRANGE] = { + .opstate = XFS_OPSTATE_WARNED_EXCHRANGE, + .name = "exchange range", + }, + [XFS_EXPERIMENTAL_PPTR] = { + .opstate = XFS_OPSTATE_WARNED_PPTR, + .name = "parent pointer", + }, + [XFS_EXPERIMENTAL_METADIR] = { + .opstate = XFS_OPSTATE_WARNED_METADIR, + .name = "metadata directory tree", + }, + }; + ASSERT(feat >= 0 && feat < XFS_EXPERIMENTAL_MAX); + BUILD_BUG_ON(ARRAY_SIZE(features) != XFS_EXPERIMENTAL_MAX); + + if (xfs_should_warn(mp, features[feat].opstate)) + xfs_warn(mp, + "EXPERIMENTAL %s feature enabled. Use at your own risk!", + features[feat].name); +} diff --git a/fs/xfs/xfs_message.h b/fs/xfs/xfs_message.h index cc323775a12c..7fb36ced9df7 100644 --- a/fs/xfs/xfs_message.h +++ b/fs/xfs/xfs_message.h @@ -75,12 +75,6 @@ do { \ #define xfs_debug_ratelimited(dev, fmt, ...) \ xfs_printk_ratelimited(xfs_debug, dev, fmt, ##__VA_ARGS__) -#define xfs_warn_mount(mp, warntag, fmt, ...) \ -do { \ - if (xfs_should_warn((mp), (warntag))) \ - xfs_warn((mp), (fmt), ##__VA_ARGS__); \ -} while (0) - #define xfs_warn_once(dev, fmt, ...) \ xfs_printk_once(xfs_warn, dev, fmt, ##__VA_ARGS__) #define xfs_notice_once(dev, fmt, ...) \ @@ -96,4 +90,18 @@ extern void xfs_hex_dump(const void *p, int length); void xfs_buf_alert_ratelimited(struct xfs_buf *bp, const char *rlmsg, const char *fmt, ...); +enum xfs_experimental_feat { + XFS_EXPERIMENTAL_PNFS, + XFS_EXPERIMENTAL_SCRUB, + XFS_EXPERIMENTAL_SHRINK, + XFS_EXPERIMENTAL_LARP, + XFS_EXPERIMENTAL_LBS, + XFS_EXPERIMENTAL_EXCHRANGE, + XFS_EXPERIMENTAL_PPTR, + XFS_EXPERIMENTAL_METADIR, + + XFS_EXPERIMENTAL_MAX, +}; +void xfs_warn_experimental(struct xfs_mount *mp, enum xfs_experimental_feat f); + #endif /* __XFS_MESSAGE_H */ diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 25bbcc3f4ee0..2dd2606fc7e3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -35,6 +35,7 @@ #include "xfs_trace.h" #include "xfs_ag.h" #include "xfs_rtbitmap.h" +#include "xfs_metafile.h" #include "scrub/stats.h" static DEFINE_MUTEX(xfs_uuid_table_mutex); @@ -620,6 +621,22 @@ xfs_mount_setup_inode_geom( xfs_ialloc_setup_geometry(mp); } +/* Mount the metadata directory tree root. */ +STATIC int +xfs_mount_setup_metadir( + struct xfs_mount *mp) +{ + int error; + + /* Load the metadata directory root inode into memory. */ + error = xfs_metafile_iget(mp, mp->m_sb.sb_metadirino, XFS_METAFILE_DIR, + &mp->m_metadirip); + if (error) + xfs_warn(mp, "Failed to load metadir root directory, error %d", + error); + return error; +} + /* Compute maximum possible height for per-AG btree types for this fs. */ static inline void xfs_agbtree_compute_maxlevels( @@ -866,6 +883,12 @@ xfs_mountfs( mp->m_features |= XFS_FEAT_ATTR2; } + if (xfs_has_metadir(mp)) { + error = xfs_mount_setup_metadir(mp); + if (error) + goto out_free_metadir; + } + /* * Get and sanity-check the root inode. * Save the pointer to it in the mount structure. @@ -876,7 +899,7 @@ xfs_mountfs( xfs_warn(mp, "Failed to read root inode 0x%llx, error %d", sbp->sb_rootino, -error); - goto out_log_dealloc; + goto out_free_metadir; } ASSERT(rip != NULL); @@ -1018,6 +1041,9 @@ xfs_mountfs( xfs_irele(rip); /* Clean out dquots that might be in memory after quotacheck. */ xfs_qm_unmount(mp); + out_free_metadir: + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); /* * Inactivate all inodes that might still be in memory after a log @@ -1039,7 +1065,6 @@ xfs_mountfs( * quota inodes. */ xfs_unmount_flush_inodes(mp); - out_log_dealloc: xfs_log_mount_cancel(mp); out_inodegc_shrinker: shrinker_free(mp->m_inodegc_shrinker); @@ -1091,6 +1116,8 @@ xfs_unmountfs( xfs_qm_unmount_quotas(mp); xfs_rtunmount_inodes(mp); xfs_irele(mp->m_rootip); + if (mp->m_metadirip) + xfs_irele(mp->m_metadirip); xfs_unmount_flush_inodes(mp); diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 1b698878f40c..71ed04ddd737 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -127,6 +127,7 @@ typedef struct xfs_mount { struct xfs_inode *m_rbmip; /* pointer to bitmap inode */ struct xfs_inode *m_rsumip; /* pointer to summary inode */ struct xfs_inode *m_rootip; /* pointer to root directory */ + struct xfs_inode *m_metadirip; /* ptr to metadata directory */ struct xfs_quotainfo *m_quotainfo; /* disk quota information */ struct xfs_buftarg *m_ddev_targp; /* data device */ struct xfs_buftarg *m_logdev_targp;/* log device */ @@ -332,6 +333,7 @@ typedef struct xfs_mount { #define XFS_FEAT_NEEDSREPAIR (1ULL << 25) /* needs xfs_repair */ #define XFS_FEAT_NREXT64 (1ULL << 26) /* large extent counters */ #define XFS_FEAT_EXCHANGE_RANGE (1ULL << 27) /* exchange range */ +#define XFS_FEAT_METADIR (1ULL << 28) /* metadata directory tree */ /* Mount features */ #define XFS_FEAT_NOATTR2 (1ULL << 48) /* disable attr2 creation */ @@ -387,6 +389,7 @@ __XFS_HAS_FEAT(bigtime, BIGTIME) __XFS_HAS_FEAT(needsrepair, NEEDSREPAIR) __XFS_HAS_FEAT(large_extent_counts, NREXT64) __XFS_HAS_FEAT(exchange_range, EXCHANGE_RANGE) +__XFS_HAS_FEAT(metadir, METADIR) /* * Some features are always on for v5 file systems, allow the compiler to @@ -467,18 +470,28 @@ __XFS_HAS_FEAT(nouuid, NOUUID) */ #define XFS_OPSTATE_BLOCKGC_ENABLED 6 +/* Kernel has logged a warning about pNFS being used on this fs. */ +#define XFS_OPSTATE_WARNED_PNFS 7 /* Kernel has logged a warning about online fsck being used on this fs. */ -#define XFS_OPSTATE_WARNED_SCRUB 7 +#define XFS_OPSTATE_WARNED_SCRUB 8 /* Kernel has logged a warning about shrink being used on this fs. */ -#define XFS_OPSTATE_WARNED_SHRINK 8 +#define XFS_OPSTATE_WARNED_SHRINK 9 /* Kernel has logged a warning about logged xattr updates being used. */ -#define XFS_OPSTATE_WARNED_LARP 9 +#define XFS_OPSTATE_WARNED_LARP 10 /* Mount time quotacheck is running */ -#define XFS_OPSTATE_QUOTACHECK_RUNNING 10 +#define XFS_OPSTATE_QUOTACHECK_RUNNING 11 /* Do we want to clear log incompat flags? */ -#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 11 +#define XFS_OPSTATE_UNSET_LOG_INCOMPAT 12 /* Filesystem can use logged extended attributes */ -#define XFS_OPSTATE_USE_LARP 12 +#define XFS_OPSTATE_USE_LARP 13 +/* Kernel has logged a warning about blocksize > pagesize on this fs. */ +#define XFS_OPSTATE_WARNED_LBS 14 +/* Kernel has logged a warning about exchange-range being used on this fs. */ +#define XFS_OPSTATE_WARNED_EXCHRANGE 15 +/* Kernel has logged a warning about parent pointers being used on this fs. */ +#define XFS_OPSTATE_WARNED_PPTR 16 +/* Kernel has logged a warning about metadata dirs being used on this fs. */ +#define XFS_OPSTATE_WARNED_METADIR 17 #define __XFS_IS_OPSTATE(name, NAME) \ static inline bool xfs_is_ ## name (struct xfs_mount *mp) \ diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c index 23d16186e1a3..6f4479deac6d 100644 --- a/fs/xfs/xfs_pnfs.c +++ b/fs/xfs/xfs_pnfs.c @@ -58,8 +58,7 @@ xfs_fs_get_uuid( { struct xfs_mount *mp = XFS_M(sb); - xfs_notice_once(mp, -"Using experimental pNFS feature, use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PNFS); if (*len < sizeof(uuid_t)) return -EINVAL; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 7e2307921deb..b94d6f192e72 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -27,6 +27,8 @@ #include "xfs_ialloc.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_metafile.h" /* * The global quota manager. There is only one of these for the entire @@ -302,6 +304,8 @@ xfs_qm_need_dqattach( return false; if (xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return false; + if (xfs_is_metadir_inode(ip)) + return false; return true; } @@ -324,6 +328,7 @@ xfs_qm_dqattach_locked( return 0; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + ASSERT(!xfs_is_metadir_inode(ip)); if (XFS_IS_UQUOTA_ON(mp) && !ip->i_udquot) { error = xfs_qm_dqattach_one(ip, XFS_DQTYPE_USER, @@ -733,6 +738,17 @@ xfs_qm_destroy_quotainfo( mp->m_quotainfo = NULL; } +static inline enum xfs_metafile_type +xfs_qm_metafile_type( + unsigned int flags) +{ + if (flags & XFS_QMOPT_UQUOTA) + return XFS_METAFILE_USRQUOTA; + else if (flags & XFS_QMOPT_GQUOTA) + return XFS_METAFILE_GRPQUOTA; + return XFS_METAFILE_PRJQUOTA; +} + /* * Create an inode and return with a reference already taken, but unlocked * This is how we create quota inodes @@ -744,6 +760,7 @@ xfs_qm_qino_alloc( unsigned int flags) { struct xfs_trans *tp; + enum xfs_metafile_type metafile_type = xfs_qm_metafile_type(flags); int error; bool need_alloc = true; @@ -777,9 +794,10 @@ xfs_qm_qino_alloc( } } if (ino != NULLFSINO) { - error = xfs_iget(mp, NULL, ino, 0, 0, ipp); + error = xfs_metafile_iget(mp, ino, metafile_type, ipp); if (error) return error; + mp->m_sb.sb_gquotino = NULLFSINO; mp->m_sb.sb_pquotino = NULLFSINO; need_alloc = false; @@ -806,6 +824,8 @@ xfs_qm_qino_alloc( xfs_trans_cancel(tp); return error; } + if (xfs_has_metadir(mp)) + xfs_metafile_set_iflag(tp, *ipp, metafile_type); } /* @@ -1189,6 +1209,10 @@ xfs_qm_dqusage_adjust( } } + /* Metadata directory files are not accounted to user-visible quotas. */ + if (xfs_is_metadir_inode(ip)) + goto error0; + ASSERT(ip->i_delayed_blks == 0); if (XFS_IS_REALTIME_INODE(ip)) { @@ -1553,16 +1577,20 @@ xfs_qm_qino_load( struct xfs_inode **ipp) { xfs_ino_t ino = NULLFSINO; + enum xfs_metafile_type metafile_type = XFS_METAFILE_UNKNOWN; switch (type) { case XFS_DQTYPE_USER: ino = mp->m_sb.sb_uquotino; + metafile_type = XFS_METAFILE_USRQUOTA; break; case XFS_DQTYPE_GROUP: ino = mp->m_sb.sb_gquotino; + metafile_type = XFS_METAFILE_GRPQUOTA; break; case XFS_DQTYPE_PROJ: ino = mp->m_sb.sb_pquotino; + metafile_type = XFS_METAFILE_PRJQUOTA; break; default: ASSERT(0); @@ -1572,7 +1600,7 @@ xfs_qm_qino_load( if (ino == NULLFSINO) return -ENOENT; - return xfs_iget(mp, NULL, ino, 0, 0, ipp); + return xfs_metafile_iget(mp, ino, metafile_type, ipp); } /* @@ -1735,6 +1763,8 @@ xfs_qm_vop_dqalloc( if (!XFS_IS_QUOTA_ON(mp)) return 0; + ASSERT(!xfs_is_metadir_inode(ip)); + lockflags = XFS_ILOCK_EXCL; xfs_ilock(ip, lockflags); @@ -1864,6 +1894,7 @@ xfs_qm_vop_chown( xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); ASSERT(XFS_IS_QUOTA_ON(ip->i_mount)); + ASSERT(!xfs_is_metadir_inode(ip)); /* old dquot */ prevdq = *IO_olddq; @@ -1951,6 +1982,7 @@ xfs_qm_vop_create_dqattach( return; xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); + ASSERT(!xfs_is_metadir_inode(ip)); if (udqp && XFS_IS_UQUOTA_ON(mp)) { ASSERT(ip->i_udquot == NULL); diff --git a/fs/xfs/xfs_quota.h b/fs/xfs/xfs_quota.h index 23d71a55bbc0..645761997bf2 100644 --- a/fs/xfs/xfs_quota.h +++ b/fs/xfs/xfs_quota.h @@ -29,6 +29,11 @@ struct xfs_buf; (XFS_IS_GQUOTA_ON(mp) && (ip)->i_gdquot == NULL) || \ (XFS_IS_PQUOTA_ON(mp) && (ip)->i_pdquot == NULL)) +#define XFS_IS_DQDETACHED(ip) \ + ((ip)->i_udquot == NULL && \ + (ip)->i_gdquot == NULL && \ + (ip)->i_pdquot == NULL) + #define XFS_QM_NEED_QUOTACHECK(mp) \ ((XFS_IS_UQUOTA_ON(mp) && \ (mp->m_sb.sb_qflags & XFS_UQUOTA_CHKD) == 0) || \ diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index 3a2005a1e673..46a920b192d1 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -25,6 +25,8 @@ #include "xfs_quota.h" #include "xfs_log_priv.h" #include "xfs_health.h" +#include "xfs_da_format.h" +#include "xfs_metafile.h" /* * Return whether there are any free extents in the size range given @@ -1101,16 +1103,12 @@ xfs_rtalloc_reinit_frextents( */ static inline int xfs_rtmount_iread_extents( + struct xfs_trans *tp, struct xfs_inode *ip, unsigned int lock_class) { - struct xfs_trans *tp; int error; - error = xfs_trans_alloc_empty(ip->i_mount, &tp); - if (error) - return error; - xfs_ilock(ip, XFS_ILOCK_EXCL | lock_class); error = xfs_iread_extents(tp, ip, XFS_DATA_FORK); @@ -1125,7 +1123,6 @@ xfs_rtmount_iread_extents( out_unlock: xfs_iunlock(ip, XFS_ILOCK_EXCL | lock_class); - xfs_trans_cancel(tp); return error; } @@ -1133,45 +1130,54 @@ out_unlock: * Get the bitmap and summary inodes and the summary cache into the mount * structure at mount time. */ -int /* error */ +int xfs_rtmount_inodes( - xfs_mount_t *mp) /* file system mount structure */ + struct xfs_mount *mp) { - int error; /* error return value */ - xfs_sb_t *sbp; + struct xfs_trans *tp; + struct xfs_sb *sbp = &mp->m_sb; + int error; - sbp = &mp->m_sb; - error = xfs_iget(mp, NULL, sbp->sb_rbmino, 0, 0, &mp->m_rbmip); + error = xfs_trans_alloc_empty(mp, &tp); + if (error) + return error; + + error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rbmino, + XFS_METAFILE_RTBITMAP, &mp->m_rbmip); if (xfs_metadata_is_sick(error)) xfs_rt_mark_sick(mp, XFS_SICK_RT_BITMAP); if (error) - return error; + goto out_trans; ASSERT(mp->m_rbmip != NULL); - error = xfs_rtmount_iread_extents(mp->m_rbmip, XFS_ILOCK_RTBITMAP); + error = xfs_rtmount_iread_extents(tp, mp->m_rbmip, XFS_ILOCK_RTBITMAP); if (error) goto out_rele_bitmap; - error = xfs_iget(mp, NULL, sbp->sb_rsumino, 0, 0, &mp->m_rsumip); + error = xfs_trans_metafile_iget(tp, mp->m_sb.sb_rsumino, + XFS_METAFILE_RTSUMMARY, &mp->m_rsumip); if (xfs_metadata_is_sick(error)) xfs_rt_mark_sick(mp, XFS_SICK_RT_SUMMARY); if (error) goto out_rele_bitmap; ASSERT(mp->m_rsumip != NULL); - error = xfs_rtmount_iread_extents(mp->m_rsumip, XFS_ILOCK_RTSUM); + error = xfs_rtmount_iread_extents(tp, mp->m_rsumip, XFS_ILOCK_RTSUM); if (error) goto out_rele_summary; error = xfs_alloc_rsum_cache(mp, sbp->sb_rbmblocks); if (error) goto out_rele_summary; + xfs_trans_cancel(tp); return 0; out_rele_summary: xfs_irele(mp->m_rsumip); out_rele_bitmap: xfs_irele(mp->m_rbmip); +out_trans: + xfs_trans_cancel(tp); return error; } diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 457c2d70968d..be493d392960 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1657,9 +1657,7 @@ xfs_fs_fill_super( goto out_free_sb; } - xfs_warn(mp, -"EXPERIMENTAL: V5 Filesystem with Large Block Size (%d bytes) enabled.", - mp->m_sb.sb_blocksize); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LBS); } /* Ensure this filesystem fits in the page cache limits */ @@ -1733,6 +1731,9 @@ xfs_fs_fill_super( mp->m_features &= ~XFS_FEAT_DISCARD; } + if (xfs_has_metadir(mp)) + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_METADIR); + if (xfs_has_reflink(mp)) { if (mp->m_sb.sb_rblocks) { xfs_alert(mp, @@ -1755,12 +1756,10 @@ xfs_fs_fill_super( } if (xfs_has_exchange_range(mp)) - xfs_warn(mp, - "EXPERIMENTAL exchange-range feature enabled. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_EXCHRANGE); if (xfs_has_parent(mp)) - xfs_warn(mp, - "EXPERIMENTAL parent pointer feature enabled. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_PPTR); error = xfs_mountfs(mp); if (error) diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c index 17164b2d0472..1b9d75a54c5e 100644 --- a/fs/xfs/xfs_trace.c +++ b/fs/xfs/xfs_trace.c @@ -46,6 +46,8 @@ #include "xfs_parent.h" #include "xfs_rmap.h" #include "xfs_refcount.h" +#include "xfs_metafile.h" +#include "xfs_metadir.h" /* * We include this last to have the helpers above available for the trace diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 0d5caba76a82..9a6a09e2e529 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -95,6 +95,7 @@ struct xfs_attrlist_cursor_kern; struct xfs_extent_free_item; struct xfs_rmap_intent; struct xfs_refcount_intent; +struct xfs_metadir_update; #define XFS_ATTR_FILTER_FLAGS \ { XFS_ATTR_ROOT, "ROOT" }, \ @@ -5356,6 +5357,107 @@ DEFINE_EVENT(xfs_getparents_class, name, \ DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_begin); DEFINE_XFS_GETPARENTS_EVENT(xfs_getparents_end); +DECLARE_EVENT_CLASS(xfs_metadir_update_class, + TP_PROTO(const struct xfs_metadir_update *upd), + TP_ARGS(upd), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __string(fname, upd->path) + ), + TP_fast_assign( + __entry->dev = upd->dp->i_mount->m_super->s_dev; + __entry->dp_ino = upd->dp->i_ino; + __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO; + __assign_str(fname); + ), + TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(fname), + __entry->ino) +) + +#define DEFINE_METADIR_UPDATE_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_update_class, name, \ + TP_PROTO(const struct xfs_metadir_update *upd), \ + TP_ARGS(upd)) +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_start_link); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_commit); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_cancel); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_try_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_create); +DEFINE_METADIR_UPDATE_EVENT(xfs_metadir_link); + +DECLARE_EVENT_CLASS(xfs_metadir_update_error_class, + TP_PROTO(const struct xfs_metadir_update *upd, int error), + TP_ARGS(upd, error), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __field(int, error) + __string(fname, upd->path) + ), + TP_fast_assign( + __entry->dev = upd->dp->i_mount->m_super->s_dev; + __entry->dp_ino = upd->dp->i_ino; + __entry->ino = upd->ip ? upd->ip->i_ino : NULLFSINO; + __entry->error = error; + __assign_str(fname); + ), + TP_printk("dev %d:%d dp 0x%llx fname '%s' ino 0x%llx error %d", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __get_str(fname), + __entry->ino, + __entry->error) +) + +#define DEFINE_METADIR_UPDATE_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_update_error_class, name, \ + TP_PROTO(const struct xfs_metadir_update *upd, int error), \ + TP_ARGS(upd, error)) +DEFINE_METADIR_UPDATE_ERROR_EVENT(xfs_metadir_teardown); + +DECLARE_EVENT_CLASS(xfs_metadir_class, + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, + xfs_ino_t ino), + TP_ARGS(dp, name, ino), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_ino_t, dp_ino) + __field(xfs_ino_t, ino) + __field(int, ftype) + __field(int, namelen) + __dynamic_array(char, name, name->len) + ), + TP_fast_assign( + __entry->dev = VFS_I(dp)->i_sb->s_dev; + __entry->dp_ino = dp->i_ino; + __entry->ino = ino, + __entry->ftype = name->type; + __entry->namelen = name->len; + memcpy(__get_str(name), name->name, name->len); + ), + TP_printk("dev %d:%d dir 0x%llx type %s name '%.*s' ino 0x%llx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->dp_ino, + __print_symbolic(__entry->ftype, XFS_DIR3_FTYPE_STR), + __entry->namelen, + __get_str(name), + __entry->ino) +) + +#define DEFINE_METADIR_EVENT(name) \ +DEFINE_EVENT(xfs_metadir_class, name, \ + TP_PROTO(struct xfs_inode *dp, struct xfs_name *name, \ + xfs_ino_t ino), \ + TP_ARGS(dp, name, ino)) +DEFINE_METADIR_EVENT(xfs_metadir_lookup); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index b368e13424c4..ca7df018290e 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -156,6 +156,8 @@ xfs_trans_mod_ino_dquot( unsigned int field, int64_t delta) { + ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); + xfs_trans_mod_dquot(tp, dqp, field, delta); if (xfs_hooks_switched_on(&xfs_dqtrx_hooks_switch)) { @@ -247,6 +249,8 @@ xfs_trans_mod_dquot_byino( xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; + ASSERT(!xfs_is_metadir_inode(ip) || XFS_IS_DQDETACHED(ip)); + if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) xfs_trans_mod_ino_dquot(tp, ip, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) @@ -962,6 +966,8 @@ xfs_trans_reserve_quota_nblks( if (!XFS_IS_QUOTA_ON(mp)) return 0; + if (xfs_is_metadir_inode(ip)) + return 0; ASSERT(!xfs_is_quota_inode(&mp->m_sb, ip->i_ino)); xfs_assert_ilocked(ip, XFS_ILOCK_EXCL); diff --git a/fs/xfs/xfs_xattr.c b/fs/xfs/xfs_xattr.c index eaf849260bd6..0f641a9091ec 100644 --- a/fs/xfs/xfs_xattr.c +++ b/fs/xfs/xfs_xattr.c @@ -51,8 +51,7 @@ xfs_attr_grab_log_assist( return error; xfs_set_using_logged_xattrs(mp); - xfs_warn_mount(mp, XFS_OPSTATE_WARNED_LARP, - "EXPERIMENTAL logged extended attributes feature in use. Use at your own risk!"); + xfs_warn_experimental(mp, XFS_EXPERIMENTAL_LARP); return 0; } |