summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-09-01 10:45:57 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-09-17 18:54:53 -0700
commit03f4db65226a4fd8c62e54b84899e542336dd8a5 (patch)
tree314185ce879dfa7499f405d1da01455fe92896ad
parent8ff8cf7895906295919b8ba36f8923dfba941b7a (diff)
xfs: implement live quotacheck inode scan
Create a new trio of scrub functions to check quota counters. While the dquots themselves are filesystem metadata and should be checked early, the dquot counter values are computed from other metadata and are therefore summary counters. We don't plug these into the scrub dispatch just yet, because we still need to be able to watch quota updates while doing our scan. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/Makefile6
-rw-r--r--fs/xfs/libxfs/xfs_fs.h3
-rw-r--r--fs/xfs/scrub/common.c34
-rw-r--r--fs/xfs/scrub/common.h12
-rw-r--r--fs/xfs/scrub/health.c1
-rw-r--r--fs/xfs/scrub/iscan.c243
-rw-r--r--fs/xfs/scrub/iscan.h69
-rw-r--r--fs/xfs/scrub/quotacheck.c458
-rw-r--r--fs/xfs/scrub/quotacheck.h59
-rw-r--r--fs/xfs/scrub/scrub.c9
-rw-r--r--fs/xfs/scrub/scrub.h14
-rw-r--r--fs/xfs/scrub/trace.h27
-rw-r--r--fs/xfs/xfs_inode.c21
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_iwalk.c13
15 files changed, 956 insertions, 16 deletions
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index 8d4c79539438..03149bf08e12 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -161,7 +161,11 @@ xfs-$(CONFIG_XFS_RT) += $(addprefix scrub/, \
rtsummary.o \
)
-xfs-$(CONFIG_XFS_QUOTA) += scrub/quota.o
+xfs-$(CONFIG_XFS_QUOTA) += $(addprefix scrub/, \
+ iscan.o \
+ quota.o \
+ quotacheck.o \
+ )
# online repair
ifeq ($(CONFIG_XFS_ONLINE_REPAIR),y)
diff --git a/fs/xfs/libxfs/xfs_fs.h b/fs/xfs/libxfs/xfs_fs.h
index d1be6b94c64a..e518c1f263f3 100644
--- a/fs/xfs/libxfs/xfs_fs.h
+++ b/fs/xfs/libxfs/xfs_fs.h
@@ -714,9 +714,10 @@ struct xfs_scrub_metadata {
#define XFS_SCRUB_TYPE_GQUOTA 22 /* group quotas */
#define XFS_SCRUB_TYPE_PQUOTA 23 /* project quotas */
#define XFS_SCRUB_TYPE_FSCOUNTERS 24 /* fs summary counters */
+#define XFS_SCRUB_TYPE_QUOTACHECK 25 /* quota counters */
/* Number of scrub subcommands. */
-#define XFS_SCRUB_TYPE_NR 25
+#define XFS_SCRUB_TYPE_NR 26
/* i: Repair this metadata. */
#define XFS_SCRUB_IFLAG_REPAIR (1 << 0)
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 9f7b8e69a10f..e1955dbdd8d6 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -27,6 +27,7 @@
#include "xfs_attr.h"
#include "xfs_reflink.h"
#include "xfs_ag.h"
+#include "xfs_error.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
@@ -79,6 +80,15 @@ __xchk_process_error(
sc->ip ? sc->ip : XFS_I(file_inode(sc->file)),
sc->sm, *error);
break;
+ case -ECANCELED:
+ /*
+ * ECANCELED here means that the caller set one of the scrub
+ * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
+ * quickly. Set error to zero and do not continue.
+ */
+ trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
+ *error = 0;
+ break;
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
@@ -86,8 +96,7 @@ __xchk_process_error(
*error = 0;
fallthrough;
default:
- trace_xchk_op_error(sc, agno, bno, *error,
- ret_ip);
+ trace_xchk_op_error(sc, agno, bno, *error, ret_ip);
break;
}
return false;
@@ -132,6 +141,16 @@ __xchk_fblock_process_error(
/* Used to restart an op with deadlock avoidance. */
trace_xchk_deadlock_retry(sc->ip, sc->sm, *error);
break;
+ case -ECANCELED:
+ /*
+ * ECANCELED here means that the caller set one of the scrub
+ * outcome flags (corrupt, xfail, xcorrupt) and wants to exit
+ * quickly. Set error to zero and do not continue.
+ */
+ trace_xchk_file_op_error(sc, whichfork, offset, *error,
+ ret_ip);
+ *error = 0;
+ break;
case -EFSBADCRC:
case -EFSCORRUPTED:
/* Note the badness but don't abort. */
@@ -223,6 +242,17 @@ xchk_block_set_corrupt(
trace_xchk_block_error(sc, xfs_buf_daddr(bp), __return_address);
}
+/* Record a corrupt quota counter. */
+void
+xchk_qcheck_set_corrupt(
+ struct xfs_scrub *sc,
+ unsigned int dqtype,
+ xfs_dqid_t id)
+{
+ sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT;
+ trace_xchk_qcheck_error(sc, dqtype, id, __return_address);
+}
+
/* Record a corruption while cross-referencing. */
void
xchk_block_xref_set_corrupt(
diff --git a/fs/xfs/scrub/common.h b/fs/xfs/scrub/common.h
index 669ce29f2ecf..a6f9a29bc79a 100644
--- a/fs/xfs/scrub/common.h
+++ b/fs/xfs/scrub/common.h
@@ -52,6 +52,8 @@ void xchk_block_set_corrupt(struct xfs_scrub *sc,
void xchk_ino_set_corrupt(struct xfs_scrub *sc, xfs_ino_t ino);
void xchk_fblock_set_corrupt(struct xfs_scrub *sc, int whichfork,
xfs_fileoff_t offset);
+void xchk_qcheck_set_corrupt(struct xfs_scrub *sc, unsigned int dqtype,
+ xfs_dqid_t id);
void xchk_block_xref_set_corrupt(struct xfs_scrub *sc,
struct xfs_buf *bp);
@@ -101,12 +103,18 @@ xchk_setup_rtsummary(struct xfs_scrub *sc)
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_setup_quota(struct xfs_scrub *sc);
+int xchk_setup_quotacheck(struct xfs_scrub *sc);
#else
static inline int
xchk_setup_quota(struct xfs_scrub *sc)
{
return -ENOENT;
}
+static inline int
+xchk_setup_quotacheck(struct xfs_scrub *sc)
+{
+ return -ENOENT;
+}
#endif
int xchk_setup_fscounters(struct xfs_scrub *sc);
@@ -166,4 +174,8 @@ static inline bool xfs_scrub_needs_repair(struct xfs_scrub_metadata *sm)
XFS_SCRUB_OFLAG_PREEN);
}
+int xchk_iwalk_find_next(struct xfs_mount *mp, struct xfs_trans *tp,
+ struct xfs_buf *agi_bp, struct xfs_perag *pag,
+ xfs_agino_t *cursor);
+
#endif /* __XFS_SCRUB_COMMON_H__ */
diff --git a/fs/xfs/scrub/health.c b/fs/xfs/scrub/health.c
index 54b457482934..3130f7c4ee14 100644
--- a/fs/xfs/scrub/health.c
+++ b/fs/xfs/scrub/health.c
@@ -103,6 +103,7 @@ static const struct xchk_health_map type_to_health_flag[XFS_SCRUB_TYPE_NR] = {
[XFS_SCRUB_TYPE_GQUOTA] = { XHG_FS, XFS_SICK_FS_GQUOTA },
[XFS_SCRUB_TYPE_PQUOTA] = { XHG_FS, XFS_SICK_FS_PQUOTA },
[XFS_SCRUB_TYPE_FSCOUNTERS] = { XHG_FS, XFS_SICK_FS_COUNTERS },
+ [XFS_SCRUB_TYPE_QUOTACHECK] = { XHG_FS, XFS_SICK_FS_QUOTACHECK },
};
/* Return the health status mask for this scrub type. */
diff --git a/fs/xfs/scrub/iscan.c b/fs/xfs/scrub/iscan.c
new file mode 100644
index 000000000000..3254c4e0a74f
--- /dev/null
+++ b/fs/xfs/scrub/iscan.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_btree.h"
+#include "xfs_ialloc.h"
+#include "xfs_ialloc_btree.h"
+#include "xfs_ag.h"
+#include "xfs_error.h"
+#include "xfs_bit.h"
+#include "scrub/iscan.h"
+
+/*
+ * Live Inode Scan
+ * ===============
+ *
+ * Live inode scans walk every inode in a live filesystem. This is more or
+ * less like a regular iwalk, except that when we're advancing the scan cursor,
+ * we must ensure that inodes cannot be added or deleted anywhere between the
+ * old cursor value and the new cursor value. If we're advancing the cursor
+ * by one inode, the caller must hold that inode; if we're finding the next
+ * inode to scan, we must grab the AGI and hold it until we've updated the
+ * scan cursor.
+ *
+ * Callers are expected to use this code to scan all files in the filesystem to
+ * construct a new metadata index of some kind. The scan races against other
+ * live updates, which means there must be a provision to update the new index
+ * when updates are made to inodes that already been scanned. The iscan lock
+ * can be used in live update hook code to stop the scan and protect this data
+ * structure.
+ */
+
+/*
+ * Set the bits in @irec's free mask that correspond to the inodes before
+ * @agino so that we skip them. This is how we restart an inode walk that was
+ * interrupted in the middle of an inode record.
+ */
+STATIC void
+xchk_iscan_adjust_start(
+ xfs_agino_t agino, /* starting inode of chunk */
+ struct xfs_inobt_rec_incore *irec) /* btree record */
+{
+ int idx; /* index into inode chunk */
+
+ idx = agino - irec->ir_startino;
+
+ irec->ir_free |= xfs_inobt_maskn(0, idx);
+ irec->ir_freecount = hweight64(irec->ir_free);
+}
+
+/*
+ * Set *cursor to the next allocated inode after whatever it's set to now.
+ * If there are no more inodes in this AG, cursor is set to NULLAGINO.
+ */
+int
+xchk_iscan_find_next(
+ struct xfs_mount *mp,
+ struct xfs_trans *tp,
+ struct xfs_buf *agi_bp,
+ struct xfs_perag *pag,
+ xfs_agino_t *cursor)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ xfs_agnumber_t agno = pag->pag_agno;
+ xfs_agino_t lastino = NULLAGINO;
+ xfs_agino_t first, last;
+ xfs_agino_t agino = *cursor;
+ int has_rec;
+ int error;
+
+ /* If the cursor is beyond the end of this AG, move to the next one. */
+ xfs_agino_range(mp, agno, &first, &last);
+ if (agino > last) {
+ *cursor = NULLAGINO;
+ return 0;
+ }
+
+ /*
+ * Look up the inode chunk for the current cursor position. If there
+ * is no chunk here, we want the next one.
+ */
+ cur = xfs_inobt_init_cursor(mp, tp, agi_bp, pag, XFS_BTNUM_INO);
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has_rec);
+ if (!error && !has_rec)
+ error = xfs_btree_increment(cur, 0, &has_rec);
+ for (; !error; error = xfs_btree_increment(cur, 0, &has_rec)) {
+ /*
+ * If we've run out of inobt records in this AG, move the
+ * cursor on to the next AG and exit. The caller can try
+ * again with the next AG.
+ */
+ if (!has_rec) {
+ *cursor = NULLAGINO;
+ break;
+ }
+
+ error = xfs_inobt_get_rec(cur, &rec, &has_rec);
+ if (error)
+ break;
+ if (!has_rec) {
+ error = -EFSCORRUPTED;
+ break;
+ }
+
+ /* Make sure that we always move forward. */
+ if (lastino != NULLAGINO &&
+ XFS_IS_CORRUPT(mp, lastino >= rec.ir_startino)) {
+ error = -EFSCORRUPTED;
+ break;
+ }
+ lastino = rec.ir_startino + XFS_INODES_PER_CHUNK - 1;
+
+ /*
+ * If this record only covers inodes that come before the
+ * cursor, advance to the next record.
+ */
+ if (rec.ir_startino + XFS_INODES_PER_CHUNK <= agino)
+ continue;
+
+ /*
+ * If the incoming lookup put us in the middle of an inobt
+ * record, mark it and the previous inodes "free" so that the
+ * search for allocated inodes will start at the cursor. Use
+ * funny math to avoid overflowing the bit shift.
+ */
+ if (agino >= rec.ir_startino)
+ xchk_iscan_adjust_start(agino + 1, &rec);
+
+ /*
+ * If there are allocated inodes in this chunk, find them,
+ * and update the cursor.
+ */
+ if (rec.ir_freecount < XFS_INODES_PER_CHUNK) {
+ int next = xfs_lowbit64(~rec.ir_free);
+
+ *cursor = rec.ir_startino + next;
+ break;
+ }
+ }
+
+ xfs_btree_del_cursor(cur, error);
+ return error;
+}
+
+/*
+ * Prepare to return agno/agino to the iscan caller by moving the lastino
+ * cursor to the previous inode. Do this while we still hold the AGI so that
+ * no other threads can create or delete inodes in this AG.
+ */
+static inline void
+xchk_iscan_move_cursor(
+ struct xchk_iscan *iscan,
+ struct xfs_mount *mp,
+ xfs_agnumber_t agno,
+ xfs_agino_t agino)
+{
+ mutex_lock(&iscan->lock);
+ iscan->cursor_ino = XFS_AGINO_TO_INO(mp, agno, agino);
+ iscan->marked_ino = iscan->cursor_ino - 1;
+ mutex_unlock(&iscan->lock);
+}
+
+/*
+ * Advance ino to the next inode that the inobt thinks is allocated, being
+ * careful to jump to the next AG and to skip quota inodes. Advancing ino
+ * effectively means that we've pushed the quotacheck scan forward, so set the
+ * quotacheck cursor to (ino - 1) so that our shadow dquot tracking will track
+ * inode allocations in that range once we release the AGI buffer.
+ */
+int
+xchk_iscan_advance(
+ struct xchk_iscan *iscan,
+ struct xfs_trans *tp)
+{
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_buf *agi_bp;
+ struct xfs_perag *pag;
+ xfs_agnumber_t agno;
+ xfs_agino_t agino;
+ int error;
+
+ ASSERT(iscan->cursor_ino >= iscan->marked_ino);
+
+next_ag:
+ agno = XFS_INO_TO_AGNO(mp, iscan->cursor_ino);
+ if (agno >= mp->m_sb.sb_agcount) {
+ xchk_iscan_move_cursor(iscan, mp, agno, 0);
+ iscan->cursor_ino = NULLFSINO;
+ return 0;
+ }
+ agino = XFS_INO_TO_AGINO(mp, iscan->cursor_ino);
+
+ pag = xfs_perag_get(mp, agno);
+ error = xfs_ialloc_read_agi(mp, tp, agno, &agi_bp);
+ if (error)
+ goto out_pag;
+
+ error = xchk_iscan_find_next(mp, tp, agi_bp, pag, &agino);
+ if (error)
+ goto out_buf;
+ if (agino == NULLAGINO) {
+ xchk_iscan_move_cursor(iscan, mp, agno + 1, 0);
+ xfs_trans_brelse(tp, agi_bp);
+ xfs_perag_put(pag);
+ goto next_ag;
+ }
+
+ xchk_iscan_move_cursor(iscan, mp, agno, agino);
+out_buf:
+ xfs_trans_brelse(tp, agi_bp);
+out_pag:
+ xfs_perag_put(pag);
+ return error;
+}
+
+void
+xchk_iscan_finish(
+ struct xchk_iscan *iscan)
+{
+ mutex_destroy(&iscan->lock);
+ iscan->cursor_ino = NULLFSINO;
+ iscan->marked_ino = NULLFSINO;
+}
+
+void
+xchk_iscan_start(
+ struct xchk_iscan *iscan)
+{
+ iscan->marked_ino = 0;
+ iscan->cursor_ino = 0;
+ mutex_init(&iscan->lock);
+}
diff --git a/fs/xfs/scrub/iscan.h b/fs/xfs/scrub/iscan.h
new file mode 100644
index 000000000000..8d3ef186fa94
--- /dev/null
+++ b/fs/xfs/scrub/iscan.h
@@ -0,0 +1,69 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_ISCAN_H__
+#define __XFS_SCRUB_ISCAN_H__
+
+struct xchk_iscan {
+ /* Lock to protect the scan cursor. */
+ struct mutex lock;
+
+ /* This is the inode that is being scanned. */
+ xfs_ino_t cursor_ino;
+
+ /*
+ * This is the last inode that we've successfully scanned, either
+ * because the caller scanned it, or we moved the cursor past an empty
+ * part of the inode address space. Scan callers should only use the
+ * xchk_iscan_mark function to modify this.
+ */
+ xfs_ino_t marked_ino;
+};
+
+void xchk_iscan_start(struct xchk_iscan *iscan);
+void xchk_iscan_finish(struct xchk_iscan *iscan);
+
+int xchk_iscan_advance(struct xchk_iscan *iscan, struct xfs_trans *tp);
+
+static inline void
+xchk_iscan_lock(struct xchk_iscan *iscan)
+{
+ mutex_lock(&iscan->lock);
+}
+
+static inline void
+xchk_iscan_unlock(struct xchk_iscan *iscan)
+{
+ mutex_unlock(&iscan->lock);
+}
+
+/*
+ * If the caller cannot get the cursor inode, set us up so that the next
+ * advance call will re-query the inobt at the same location.
+ */
+static inline void
+xchk_iscan_retry(struct xchk_iscan *iscan)
+{
+ ASSERT(iscan->cursor_ino == iscan->marked_ino + 1);
+
+ iscan->cursor_ino--;
+}
+
+/* Mark this inode as having been scanned. */
+static inline void
+xchk_iscan_mark(struct xchk_iscan *iscan, struct xfs_inode *ip)
+{
+ xchk_iscan_lock(iscan);
+ iscan->marked_ino = ip->i_ino;
+ xchk_iscan_unlock(iscan);
+}
+
+static inline void
+xchk_iscan_mark_locked(struct xchk_iscan *iscan, struct xfs_inode *ip)
+{
+ iscan->marked_ino = ip->i_ino;
+}
+
+#endif /* __XFS_SCRUB_ISCAN_H__ */
diff --git a/fs/xfs/scrub/quotacheck.c b/fs/xfs/scrub/quotacheck.c
new file mode 100644
index 000000000000..48e898230f15
--- /dev/null
+++ b/fs/xfs/scrub/quotacheck.c
@@ -0,0 +1,458 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "xfs_trans_resv.h"
+#include "xfs_mount.h"
+#include "xfs_log_format.h"
+#include "xfs_trans.h"
+#include "xfs_inode.h"
+#include "xfs_quota.h"
+#include "xfs_qm.h"
+#include "xfs_icache.h"
+#include "xfs_bmap_util.h"
+#include "xfs_iwalk.h"
+#include "xfs_ialloc.h"
+#include "xfs_ag.h"
+#include "scrub/scrub.h"
+#include "scrub/common.h"
+#include "scrub/repair.h"
+#include "scrub/array.h"
+#include "scrub/iscan.h"
+#include "scrub/quotacheck.h"
+
+/*
+ * Live Quotacheck
+ * ===============
+ *
+ * Quota counters are "summary" metadata, in the sense that they are computed
+ * as the summation of the block usage counts for every file on the filesystem.
+ * Therefore, we compute the correct icount, bcount, and rtbcount values by
+ * creating a shadow quota counter structure and walking every inode.
+ */
+
+/* Set us up to scrub quota counters. */
+int
+xchk_setup_quotacheck(
+ struct xfs_scrub *sc)
+{
+ /* Not ready for general consumption yet. */
+ return -EOPNOTSUPP;
+
+ if (!XFS_IS_QUOTA_ON(sc->mp))
+ return -ENOENT;
+
+ sc->buf = kmem_zalloc(sizeof(struct xqcheck), KM_NOFS | KM_MAYFAIL);
+ if (!sc->buf)
+ return -ENOMEM;
+
+ sc->flags |= XCHK_HAS_QUOTAOFFLOCK;
+ mutex_lock(&sc->mp->m_quotainfo->qi_quotaofflock);
+ ASSERT(XFS_IS_QUOTA_ON(sc->mp));
+
+ return xchk_setup_fs(sc);
+}
+
+/* Retrieve the shadow dquot for the given id. */
+int
+xqcheck_get_shadow_dquot(
+ struct xfbma *counts,
+ xfs_dqid_t id,
+ struct xqcheck_dquot *xcdq)
+{
+ int error;
+
+ error = xfbma_get(counts, id, xcdq);
+ if (error == -ENODATA) {
+ /*
+ * ENODATA means we tried to read beyond the end of the sparse
+ * array. This isn't a big deal, just zero the incore record
+ * and return that.
+ */
+ memset(xcdq, 0, sizeof(struct xqcheck_dquot));
+ return 0;
+ }
+ return error;
+}
+
+/* Update an incore dquot information. */
+static int
+xqcheck_update_incore(
+ struct xqcheck *xqc,
+ struct xfbma *counts,
+ xfs_dqid_t id,
+ int64_t inodes,
+ int64_t nblks,
+ int64_t rtblks)
+{
+ struct xqcheck_dquot xcdq;
+ int error;
+
+ if (!counts)
+ return 0;
+
+ error = xqcheck_get_shadow_dquot(counts, id, &xcdq);
+ if (error)
+ return error;
+
+ xcdq.icount += inodes;
+ xcdq.bcount += nblks;
+ xcdq.rtbcount += rtblks;
+
+ error = xfbma_set(counts, id, &xcdq);
+ if (error == -EFBIG) {
+ /*
+ * EFBIG means we tried to store data at too high a byte offset
+ * in the sparse array. IOWs, we cannot complete the check and
+ * must notify userspace that the check was incomplete.
+ */
+ xchk_set_incomplete(xqc->sc);
+ error = -ECANCELED;
+ }
+ return error;
+}
+
+/* Record this inode's quota usage in our shadow quota counter data. */
+STATIC int
+xqcheck_inode(
+ struct xqcheck *xqc,
+ struct xfs_inode *ip)
+{
+ struct xfs_trans *tp = xqc->sc->tp;
+ xfs_filblks_t nblks, rtblks;
+ uint ilock_flags = 0;
+ xfs_dqid_t id;
+ int error;
+
+ if (xfs_is_quota_inode(&tp->t_mountp->m_sb, ip->i_ino)) {
+ xchk_iscan_mark(&xqc->iscan, ip);
+ return 0;
+ }
+
+ /* Figure out the data / rt device block counts. */
+ ilock_flags = xfs_ilock_data_map_shared(ip);
+ if (XFS_IS_REALTIME_INODE(ip)) {
+ error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
+ if (error)
+ goto out_ilock;
+ }
+ xfs_inode_count_blocks(tp, ip, &nblks, &rtblks);
+
+ xchk_iscan_lock(&xqc->iscan);
+
+ /* Update the shadow dquot counters. */
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_USER);
+ error = xqcheck_update_incore(xqc, xqc->ucounts, id, 1, nblks, rtblks);
+ if (error)
+ goto out_scan_lock;
+
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_GROUP);
+ error = xqcheck_update_incore(xqc, xqc->gcounts, id, 1, nblks, rtblks);
+ if (error)
+ goto out_scan_lock;
+
+ id = xfs_qm_id_for_quotatype(ip, XFS_DQTYPE_PROJ);
+ error = xqcheck_update_incore(xqc, xqc->pcounts, id, 1, nblks, rtblks);
+ if (error)
+ goto out_scan_lock;
+
+ xchk_iscan_mark_locked(&xqc->iscan, ip);
+
+out_scan_lock:
+ if (error)
+ xchk_set_incomplete(xqc->sc);
+ xchk_iscan_unlock(&xqc->iscan);
+out_ilock:
+ xfs_iunlock(ip, ilock_flags);
+ return error;
+}
+
+/* Walk all the allocated inodes and run a quota scan on them. */
+STATIC int
+xqcheck_collect_counts(
+ struct xqcheck *xqc)
+{
+ struct xfs_scrub *sc = xqc->sc;
+ struct xchk_iscan *iscan = &xqc->iscan;
+ struct xfs_inode *ip;
+ int flags = XFS_IGET_UNTRUSTED | XFS_IGET_DONTCACHE;
+ unsigned int retries = 20;
+ int error;
+
+ while (!(error = xchk_iscan_advance(iscan, xqc->sc->tp))) {
+ if (iscan->cursor_ino == NULLFSINO ||
+ xchk_should_terminate(sc, &error))
+ break;
+
+ error = xfs_iget(sc->mp, sc->tp, iscan->cursor_ino, flags, 0,
+ &ip);
+ switch (error) {
+ case 0:
+ error = xqcheck_inode(xqc, ip);
+ xfs_irele(ip);
+ if (error)
+ return error;
+ retries = 20;
+ break;
+ case -ENOENT:
+ /*¬
+ * It's possible that this inode has lost all of its
+ * links but hasn't yet been inactivated. Try to push
+ * it towards inactivation.
+ */
+ xfs_inodegc_flush(xqc->sc->mp);
+ fallthrough;
+ case -EINVAL:
+ /*
+ * We thought the inode was allocated, but iget failed
+ * to find it. This could be because the inobt lookup
+ * failed, or because there's an incore inode that
+ * thinks it's marked free. Either way, we back up
+ * one inode and try to advance the cursor.
+ */
+ xchk_iscan_retry(iscan);
+ if (--retries == 0) {
+ xchk_set_incomplete(sc);
+ return -ECANCELED;
+ }
+ delay(HZ / 10);
+ break;
+ default:
+ return error;
+ }
+ }
+ return error;
+}
+
+/*
+ * Check the dquot data against what we observed. Caller must hold the dquot
+ * lock.
+ */
+STATIC int
+xqcheck_compare_dquot(
+ struct xfs_dquot *dqp,
+ xfs_dqtype_t dqtype,
+ void *priv)
+{
+ struct xqcheck_dquot xcdq;
+ struct xqcheck *xqc = priv;
+ struct xfbma *counts = xqcheck_counters_for(xqc, dqtype);
+ int error;
+
+ xchk_iscan_lock(&xqc->iscan);
+
+ error = xqcheck_get_shadow_dquot(counts, dqp->q_id, &xcdq);
+ if (error)
+ goto out_unlock;
+
+ if (xcdq.icount != dqp->q_ino.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dqp->q_id);
+
+ if (xcdq.bcount != dqp->q_blk.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dqp->q_id);
+
+ if (xcdq.rtbcount != dqp->q_rtb.count)
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, dqp->q_id);
+
+ if (xqc->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) {
+ error = -ECANCELED;
+ goto out_unlock;
+ }
+
+out_unlock:
+ xchk_iscan_unlock(&xqc->iscan);
+ return error;
+}
+
+/*
+ * Walk all the observed dquots, and make sure there's a matching incore
+ * dquot and that its counts match ours.
+ */
+STATIC int
+xqcheck_walk_observations(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ struct xqcheck_dquot xcdq;
+ struct xfs_dquot *dqp;
+ struct xfbma *counts = xqcheck_counters_for(xqc, dqtype);
+ uint64_t nr = 0;
+ int error;
+
+ if (!counts)
+ return 0;
+
+ xchk_iscan_lock(&xqc->iscan);
+ while (!(error = xfbma_iter_get(counts, &nr, &xcdq))) {
+ xfs_dqid_t id = nr - 1;
+
+ xchk_iscan_unlock(&xqc->iscan);
+
+ if (xchk_should_terminate(xqc->sc, &error))
+ return error;
+
+ error = xfs_qm_dqget(xqc->sc->mp, id, dqtype, false, &dqp);
+ if (error == -ENOENT) {
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, id);
+ return 0;
+ }
+ if (error)
+ return error;
+
+ error = xqcheck_compare_dquot(dqp, dqtype, xqc);
+ xfs_qm_dqput(dqp);
+ if (error)
+ return error;
+
+ xchk_iscan_lock(&xqc->iscan);
+ }
+ xchk_iscan_unlock(&xqc->iscan);
+
+ /* ENODATA means we hit the end of the array. */
+ if (error == -ENODATA)
+ return 0;
+
+ return error;
+}
+
+/* Compare the quota counters we observed against the live dquots. */
+STATIC int
+xqcheck_compare_dqtype(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ struct xfs_scrub *sc = xqc->sc;
+ int error;
+
+ if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)
+ return 0;
+
+ /* If the quota CHKD flag is cleared, we need to repair this quota. */
+ if (!(xfs_quota_chkd_flag(dqtype) & sc->mp->m_qflags)) {
+ xchk_qcheck_set_corrupt(xqc->sc, dqtype, 0);
+ return 0;
+ }
+
+ /* Compare what we observed against the actual dquots. */
+ error = xfs_qm_dqiterate(sc->mp, dqtype, xqcheck_compare_dquot, xqc);
+ if (error)
+ return error;
+
+ /* Walk all the observed dquots and compare to the incore ones. */
+ return xqcheck_walk_observations(xqc, dqtype);
+}
+
+/* Tear down everything associated with a quotacheck. */
+static void
+xqcheck_teardown_scan(
+ struct xqcheck *xqc)
+{
+ if (xqc->pcounts) {
+ xfbma_destroy(xqc->pcounts);
+ xqc->pcounts = NULL;
+ }
+
+ if (xqc->gcounts) {
+ xfbma_destroy(xqc->gcounts);
+ xqc->gcounts = NULL;
+ }
+
+ if (xqc->ucounts) {
+ xfbma_destroy(xqc->ucounts);
+ xqc->ucounts = NULL;
+ }
+
+ xchk_iscan_finish(&xqc->iscan);
+ xqc->sc = NULL;
+}
+
+/*
+ * Scan all inodes in the entire filesystem to generate quota counter data.
+ * If the scan is successful, the quota data will be left alive for a repair.
+ * If any error occurs, we'll tear everything down.
+ */
+STATIC int
+xqcheck_setup_scan(
+ struct xfs_scrub *sc,
+ struct xqcheck *xqc)
+{
+ int error;
+
+ ASSERT(xqc->sc == NULL);
+ xqc->sc = sc;
+
+ xchk_iscan_start(&xqc->iscan);
+
+ error = -ENOMEM;
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_USER)) {
+ xqc->ucounts = xfbma_init("user dquots",
+ sizeof(struct xqcheck_dquot));
+ if (!xqc->ucounts)
+ goto out_teardown;
+ }
+
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_GROUP)) {
+ xqc->gcounts = xfbma_init("group dquots",
+ sizeof(struct xqcheck_dquot));
+ if (!xqc->gcounts)
+ goto out_teardown;
+ }
+
+ if (xfs_this_quota_on(sc->mp, XFS_DQTYPE_PROJ)) {
+ xqc->pcounts = xfbma_init("proj dquots",
+ sizeof(struct xqcheck_dquot));
+ if (!xqc->pcounts)
+ goto out_teardown;
+ }
+
+ /* Use deferred cleanup to pass the quota count data to repair. */
+ sc->buf_cleanup = (void (*)(void *))xqcheck_teardown_scan;
+ return 0;
+
+out_teardown:
+ xqcheck_teardown_scan(xqc);
+ return error;
+}
+
+/* Scrub all counters for a given quota type. */
+int
+xchk_quotacheck(
+ struct xfs_scrub *sc)
+{
+ struct xqcheck *xqc = sc->buf;
+ int error = 0;
+
+ /* Check quota counters on the live filesystem. */
+ error = xqcheck_setup_scan(sc, xqc);
+ if (error)
+ return error;
+
+ /* Walk all inodes, picking up quota information. */
+ error = xqcheck_collect_counts(xqc);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+
+ /* Compare quota counters. */
+ if (xqc->ucounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_USER);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+ if (xqc->gcounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_GROUP);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+ if (xqc->pcounts) {
+ error = xqcheck_compare_dqtype(xqc, XFS_DQTYPE_PROJ);
+ if (!xchk_xref_process_error(sc, 0, 0, &error))
+ return error;
+ }
+
+ return 0;
+}
diff --git a/fs/xfs/scrub/quotacheck.h b/fs/xfs/scrub/quotacheck.h
new file mode 100644
index 000000000000..69bc12fb81cf
--- /dev/null
+++ b/fs/xfs/scrub/quotacheck.h
@@ -0,0 +1,59 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2021 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_SCRUB_QUOTACHECK_H__
+#define __XFS_SCRUB_QUOTACHECK_H__
+
+/*
+ * Quota counters for live quotacheck. Pad the structure to 32 bytes to avoid
+ * a weird interaction between sparse xfbma arrays and shmem files, and so that
+ * we never mistake a zero-count xchk_dquot for a null record.
+ */
+struct xqcheck_dquot {
+ /* block usage count */
+ int64_t bcount;
+
+ /* inode usage count */
+ int64_t icount;
+
+ /* realtime block usage count */
+ int64_t rtbcount;
+};
+
+/* Live quotacheck control structure. */
+struct xqcheck {
+ struct xfs_scrub *sc;
+
+ /* Shadow dquot counter data. */
+ struct xfbma *ucounts;
+ struct xfbma *gcounts;
+ struct xfbma *pcounts;
+
+ struct xchk_iscan iscan;
+};
+
+/* Return the incore counter array for a given quota type. */
+static inline struct xfbma *
+xqcheck_counters_for(
+ struct xqcheck *xqc,
+ xfs_dqtype_t dqtype)
+{
+ switch (dqtype) {
+ case XFS_DQTYPE_USER:
+ return xqc->ucounts;
+ case XFS_DQTYPE_GROUP:
+ return xqc->gcounts;
+ case XFS_DQTYPE_PROJ:
+ return xqc->pcounts;
+ }
+
+ ASSERT(0);
+ return NULL;
+}
+
+int xqcheck_get_shadow_dquot(struct xfbma *counts, xfs_dqid_t id,
+ struct xqcheck_dquot *xcdq);
+
+#endif /* __XFS_SCRUB_QUOTACHECK_H__ */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 62e75732f9d1..bfddeb6e600e 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -181,7 +181,10 @@ xchk_teardown(
sc->xfile = NULL;
}
if (sc->buf) {
+ if (sc->buf_cleanup)
+ sc->buf_cleanup(sc->buf);
kmem_free(sc->buf);
+ sc->buf_cleanup = NULL;
sc->buf = NULL;
}
return error;
@@ -349,6 +352,12 @@ static const struct xchk_meta_ops meta_scrub_ops[] = {
.scrub = xchk_fscounters,
.repair = xrep_notsupported,
},
+ [XFS_SCRUB_TYPE_QUOTACHECK] = { /* quota counters */
+ .type = ST_FS,
+ .setup = xchk_setup_quotacheck,
+ .scrub = xchk_quotacheck,
+ .repair = xrep_notsupported,
+ },
};
/* This isn't a stable feature, warn once per day. */
diff --git a/fs/xfs/scrub/scrub.h b/fs/xfs/scrub/scrub.h
index 98fc2e222208..94ad3b60a6b8 100644
--- a/fs/xfs/scrub/scrub.h
+++ b/fs/xfs/scrub/scrub.h
@@ -80,6 +80,14 @@ struct xfs_scrub {
/* Kernel memory buffer used by scrubbers; freed at teardown. */
void *buf;
+ /*
+ * Clean up resources owned by whatever is in the buffer. Cleanup can
+ * be deferred with this hook as a means for scrub functions to pass
+ * data to repair functions. This function must not free the buffer
+ * itself.
+ */
+ void (*buf_cleanup)(void *buf);
+
/* xfile used by the scrubbers; freed at teardown. */
struct xfile *xfile;
@@ -144,12 +152,18 @@ xchk_rtsummary(struct xfs_scrub *sc)
#endif
#ifdef CONFIG_XFS_QUOTA
int xchk_quota(struct xfs_scrub *sc);
+int xchk_quotacheck(struct xfs_scrub *sc);
#else
static inline int
xchk_quota(struct xfs_scrub *sc)
{
return -ENOENT;
}
+static inline int
+xchk_quotacheck(struct xfs_scrub *sc)
+{
+ return -ENOENT;
+}
#endif
int xchk_fscounters(struct xfs_scrub *sc);
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 4998719ce2dd..bf279d48aa19 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -15,6 +15,7 @@
#include <linux/tracepoint.h>
#include "xfs_bit.h"
+#include "xfs_quota_defs.h"
struct xfile;
@@ -83,7 +84,8 @@ TRACE_DEFINE_ENUM(XFS_SCRUB_TYPE_FSCOUNTERS);
{ XFS_SCRUB_TYPE_UQUOTA, "usrquota" }, \
{ XFS_SCRUB_TYPE_GQUOTA, "grpquota" }, \
{ XFS_SCRUB_TYPE_PQUOTA, "prjquota" }, \
- { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }
+ { XFS_SCRUB_TYPE_FSCOUNTERS, "fscounters" }, \
+ { XFS_SCRUB_TYPE_QUOTACHECK, "quotacheck" }
#define XFS_SCRUB_FLAG_STRINGS \
{ XFS_SCRUB_IFLAG_REPAIR, "repair" }, \
@@ -307,6 +309,29 @@ DEFINE_EVENT(xchk_fblock_error_class, name, \
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_error);
DEFINE_SCRUB_FBLOCK_ERROR_EVENT(xchk_fblock_warning);
+TRACE_EVENT(xchk_qcheck_error,
+ TP_PROTO(struct xfs_scrub *sc, xfs_dqtype_t dqtype, xfs_dqid_t id,
+ void *ret_ip),
+ TP_ARGS(sc, dqtype, id, ret_ip),
+ TP_STRUCT__entry(
+ __field(dev_t, dev)
+ __field(xfs_dqtype_t, dqtype)
+ __field(xfs_dqid_t, id)
+ __field(void *, ret_ip)
+ ),
+ TP_fast_assign(
+ __entry->dev = sc->mp->m_super->s_dev;
+ __entry->dqtype = dqtype;
+ __entry->id = id;
+ __entry->ret_ip = ret_ip;
+ ),
+ TP_printk("dev %d:%d dquot type %s id 0x%x ret_ip %pS",
+ MAJOR(__entry->dev), MINOR(__entry->dev),
+ __print_symbolic(__entry->dqtype, XFS_DQTYPE_STRINGS),
+ __entry->id,
+ __entry->ret_ip)
+);
+
TRACE_EVENT(xchk_incomplete,
TP_PROTO(struct xfs_scrub *sc, void *ret_ip),
TP_ARGS(sc, ret_ip),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index cd5787a1f9f7..78336e30e8ed 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -3805,3 +3805,24 @@ xfs_iunlock2_io_mmap(
if (ip1 != ip2)
inode_unlock(VFS_I(ip1));
}
+
+/* Compute the number of data and realtime blocks used by a file. */
+void
+xfs_inode_count_blocks(
+ struct xfs_trans *tp,
+ struct xfs_inode *ip,
+ xfs_filblks_t *dblocks,
+ xfs_filblks_t *rblocks)
+{
+ struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
+
+ if (!XFS_IS_REALTIME_INODE(ip)) {
+ *dblocks = ip->i_nblocks;
+ *rblocks = 0;
+ return;
+ }
+
+ *rblocks = 0;
+ xfs_bmap_count_leaves(ifp, rblocks);
+ *dblocks = ip->i_nblocks - *rblocks;
+}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index b21b177832d1..753b4903f555 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -519,4 +519,7 @@ void xfs_end_io(struct work_struct *work);
int xfs_ilock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
void xfs_iunlock2_io_mmap(struct xfs_inode *ip1, struct xfs_inode *ip2);
+void xfs_inode_count_blocks(struct xfs_trans *tp, struct xfs_inode *ip,
+ xfs_filblks_t *dblocks, xfs_filblks_t *rblocks);
+
#endif /* __XFS_INODE_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 7558486f4937..7959a8b89781 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -22,6 +22,7 @@
#include "xfs_trans.h"
#include "xfs_pwork.h"
#include "xfs_ag.h"
+#include "xfs_bit.h"
/*
* Walking Inodes in the Filesystem
@@ -131,21 +132,11 @@ xfs_iwalk_adjust_start(
struct xfs_inobt_rec_incore *irec) /* btree record */
{
int idx; /* index into inode chunk */
- int i;
idx = agino - irec->ir_startino;
- /*
- * We got a right chunk with some left inodes allocated at it. Grab
- * the chunk record. Mark all the uninteresting inodes free because
- * they're before our start point.
- */
- for (i = 0; i < idx; i++) {
- if (XFS_INOBT_MASK(i) & ~irec->ir_free)
- irec->ir_freecount++;
- }
-
irec->ir_free |= xfs_inobt_maskn(0, idx);
+ irec->ir_freecount = hweight64(irec->ir_free);
}
/* Allocate memory for a walk. */