// SPDX-License-Identifier: GPL-2.0+
/*
 * Copyright (C) 2018 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <darrick.wong@oracle.com>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_btree.h"
#include "xfs_bit.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_sb.h"
#include "xfs_inode.h"
#include "xfs_inode_fork.h"
#include "xfs_alloc.h"
#include "xfs_rtalloc.h"
#include "xfs_bmap.h"
#include "xfs_bmap_util.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rmap_btree.h"
#include "xfs_refcount.h"
#include "xfs_quota.h"
#include "scrub/xfs_scrub.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/btree.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/bitmap.h"
#include "scrub/array.h"

/*
 * Inode fork block mapping (BMBT) repair.
 *
 * Basically, we gather all the rmap records for the inode and fork we're
 * fixing, reset the incore fork, then re-add all the records.
 */

/* Smallest possible record to represent a single contiguous physical map. */
#define XREP_BMAP_UNWRITTEN	((uint64_t)1ULL << 63)
struct xrep_bmap_extent {
	/* starting offset; upper bit means unwritten */
	xfs_fileoff_t	startoff;
	xfs_fsblock_t	startblock;
	xfs_filblks_t	blockcount;
} __attribute__((packed));

static inline xfs_fileoff_t
xrep_bmap_startoff(
	const struct xrep_bmap_extent	*ext)
{
	return ext->startoff & ~XREP_BMAP_UNWRITTEN;
}

struct xrep_bmap {
	/* List of new bmap records. */
	struct xfbma		*bmap_records;

	/* Old bmbt blocks */
	struct xfs_bitmap	*btlist;

	struct xfs_scrub	*sc;

	/* Inode we're fixing. */
	xfs_ino_t		ino;

	/* How many blocks did we find in the other fork? */
	xfs_rfsblock_t		otherfork_blocks;

	/* How many bmbt blocks did we find for this fork? */
	xfs_rfsblock_t		bmbt_blocks;

	/* Which fork are we fixing? */
	int			whichfork;
};

/* Record extents that belong to this inode's fork. */
STATIC int
xrep_bmap_walk_rmap(
	struct xfs_btree_cur	*cur,
	struct xfs_rmap_irec	*rec,
	void			*priv)
{
	struct xrep_bmap	*rb = priv;
	struct xrep_bmap_extent	rbe;
	struct xfs_mount	*mp = cur->bc_mp;
	xfs_fsblock_t		fsbno;
	int			error = 0;

	if (xchk_should_terminate(rb->sc, &error))
		return error;

	/* Skip extents which are not owned by this inode and fork. */
	if (rec->rm_owner != rb->ino) {
		return 0;
	} else if (rb->whichfork == XFS_DATA_FORK &&
		 (rec->rm_flags & XFS_RMAP_ATTR_FORK)) {
		rb->otherfork_blocks += rec->rm_blockcount;
		return 0;
	} else if (rb->whichfork == XFS_ATTR_FORK &&
		 !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) {
		rb->otherfork_blocks += rec->rm_blockcount;
		return 0;
	}

	/* Delete the old bmbt blocks later. */
	if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) {
		fsbno = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno,
				rec->rm_startblock);
		rb->bmbt_blocks += rec->rm_blockcount;
		return xfs_bitmap_set(rb->btlist, fsbno, rec->rm_blockcount);
	}

	/* Remember this rmap. */
	rbe.startoff = rec->rm_offset;
	rbe.startblock = XFS_AGB_TO_FSB(mp, cur->bc_private.a.agno,
			rec->rm_startblock);
	rbe.blockcount = rec->rm_blockcount;
	if (rec->rm_flags & XFS_RMAP_UNWRITTEN)
		rbe.startoff |= XREP_BMAP_UNWRITTEN;
	return xfbma_append(rb->bmap_records, &rbe);
}

/* Compare two bmap extents. */
static int
xrep_bmap_extent_cmp(
	const void			*a,
	const void			*b)
{
	xfs_fileoff_t			ao = xrep_bmap_startoff(a);
	xfs_fileoff_t			bo = xrep_bmap_startoff(b);

	if (ao > bo)
		return 1;
	else if (ao < bo)
		return -1;
	return 0;
}

/* Scan one AG for reverse mappings that we can turn into extent maps. */
STATIC int
xrep_bmap_scan_ag(
	struct xrep_bmap	*rb,
	xfs_agnumber_t		agno)
{
	struct xfs_scrub	*sc = rb->sc;
	struct xfs_mount	*mp = sc->mp;
	struct xfs_buf		*agf_bp = NULL;
	struct xfs_btree_cur	*cur;
	int			error;

	error = xfs_alloc_read_agf(mp, sc->tp, agno, 0, &agf_bp);
	if (error)
		return error;
	if (!agf_bp)
		return -ENOMEM;
	cur = xfs_rmapbt_init_cursor(mp, sc->tp, agf_bp, agno);
	error = xfs_rmap_query_all(cur, xrep_bmap_walk_rmap, rb);
	if (error == XFS_BTREE_QUERY_RANGE_ABORT)
		error = 0;
	xfs_btree_del_cursor(cur, error);
	xfs_trans_brelse(sc->tp, agf_bp);
	return error;
}

struct xrep_add_bmap {
	struct xfs_scrub	*sc;
	int			baseflags;
};

/* Insert bmap records into an inode fork, given an rmap. */
STATIC int
xrep_bmap_insert_rec(
	const void			*item,
	void				*priv)
{
	const struct xrep_bmap_extent	*rbe = item;
	struct xfs_bmbt_irec		bmap = {
		.br_startblock	= rbe->startblock,
		.br_startoff	= xrep_bmap_startoff(rbe),
		.br_blockcount	= rbe->blockcount,
	};
	struct xrep_add_bmap		*x = priv;
	xfs_extlen_t			extlen;
	int				flags = x->baseflags;
	int				error = 0;

	if (rbe->startoff & XREP_BMAP_UNWRITTEN)
		flags |= XFS_BMAPI_PREALLOC;
	while (bmap.br_blockcount > 0) {
		extlen = min_t(xfs_filblks_t, bmap.br_blockcount, MAXEXTLEN);

		/* Re-add the extent to the fork. */
		error = xfs_bmapi_remap(x->sc->tp, x->sc->ip, bmap.br_startoff,
				extlen, bmap.br_startblock, flags);
		if (error)
			goto out;

		bmap.br_startblock += extlen;
		bmap.br_startoff += extlen;
		bmap.br_blockcount -= extlen;

		error = xfs_defer_finish(&x->sc->tp);
		if (error)
			goto out;
		/* Make sure we roll the transaction. */
		error = xfs_trans_roll_inode(&x->sc->tp, x->sc->ip);
		if (error)
			goto out;
	}

out:
	return error;
}

/* Check for garbage inputs. */
STATIC int
xrep_bmap_check_inputs(
	struct xfs_scrub	*sc,
	int			whichfork)
{
	ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK);

	/* Don't know how to repair the other fork formats. */
	if (XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_EXTENTS &&
	    XFS_IFORK_FORMAT(sc->ip, whichfork) != XFS_DINODE_FMT_BTREE)
		return -EOPNOTSUPP;

	/*
	 * If there's no attr fork area in the inode, there's no attr fork to
	 * rebuild.
	 */
	if (whichfork == XFS_ATTR_FORK) {
		if (!XFS_IFORK_Q(sc->ip))
			return -ENOENT;
		return 0;
	}

	/* Only files, symlinks, and directories get to have data forks. */
	switch (VFS_I(sc->ip)->i_mode & S_IFMT) {
	case S_IFREG:
	case S_IFDIR:
	case S_IFLNK:
		/* ok */
		break;
	default:
		return -EINVAL;
	}

	/* If we somehow have delalloc extents, forget it. */
	if (sc->ip->i_delayed_blks)
		return -EBUSY;

	/* Don't know how to rebuild realtime data forks. */
	if (XFS_IS_REALTIME_INODE(sc->ip))
		return -EOPNOTSUPP;

	return 0;
}

/*
 * Collect block mappings for this fork of this inode and decide if we have
 * enough space to rebuild.  Caller is responsible for cleaning up the list if
 * anything goes wrong.
 */
STATIC int
xrep_bmap_find_mappings(
	struct xfs_scrub	*sc,
	int			whichfork,
	struct xfbma		*bmap_records,
	struct xfs_bitmap	*old_bmbt_blocks,
	xfs_rfsblock_t		*old_bmbt_block_count,
	xfs_rfsblock_t		*otherfork_blocks)
{
	struct xrep_bmap	rb = {
		.sc		= sc,
		.bmap_records	= bmap_records,
		.btlist		= old_bmbt_blocks,
		.ino		= sc->ip->i_ino,
		.whichfork	= whichfork,
	};
	xfs_agnumber_t		agno;
	unsigned int		resblks;
	int			error;

	/* Iterate the rmaps for extents. */
	for (agno = 0; agno < sc->mp->m_sb.sb_agcount; agno++) {
		error = xrep_bmap_scan_ag(&rb, agno);
		if (error)
			return error;
	}

	/*
	 * Guess how many blocks we're going to need to rebuild an entire bmap
	 * from the number of extents we found, and pump up our transaction to
	 * have sufficient block reservation.
	 */
	resblks = xfs_bmbt_calc_size(sc->mp, xfbma_length(bmap_records));
	error = xfs_trans_reserve_more(sc->tp, resblks, 0);
	if (error)
		return error;

	*otherfork_blocks = rb.otherfork_blocks;
	*old_bmbt_block_count = rb.bmbt_blocks;
	return 0;
}

/* Update the inode counters. */
STATIC int
xrep_bmap_reset_counters(
	struct xfs_scrub	*sc,
	xfs_rfsblock_t		old_bmbt_block_count,
	xfs_rfsblock_t		otherfork_blocks,
	int			*log_flags)
{
	int			error;

	xfs_trans_ijoin(sc->tp, sc->ip, 0);

	/*
	 * We're going to use the bmap routines to reconstruct a fork from rmap
	 * records.  Those functions increment di_nblocks for us, so we need to
	 * subtract out all the data and bmbt blocks from the fork we're about
	 * to rebuild.  otherfork_blocks reflects all the data and bmbt blocks
	 * for the other fork, so this assignment effectively performs the
	 * subtraction for us.
	 */
	sc->ip->i_d.di_nblocks = otherfork_blocks;
	*log_flags |= XFS_ILOG_CORE;

	if (!old_bmbt_block_count)
		return 0;

	/* Release quota counts for the old bmbt blocks. */
	error = xrep_ino_dqattach(sc);
	if (error)
		return error;
	xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT,
			-(int64_t)old_bmbt_block_count);
	return 0;
}

/* Initialize a new fork and implant it in the inode. */
STATIC void
xrep_bmap_reset_fork(
	struct xfs_scrub	*sc,
	int			whichfork,
	bool			has_mappings,
	int			*log_flags)
{
	/* Set us back to extents format with zero records. */
	XFS_IFORK_FMT_SET(sc->ip, whichfork, XFS_DINODE_FMT_EXTENTS);
	XFS_IFORK_NEXT_SET(sc->ip, whichfork, 0);

	/* Reinitialize the in-core fork. */
	if (XFS_IFORK_PTR(sc->ip, whichfork) != NULL)
		xfs_idestroy_fork(sc->ip, whichfork);
	if (whichfork == XFS_DATA_FORK) {
		memset(&sc->ip->i_df, 0, sizeof(struct xfs_ifork));
		sc->ip->i_df.if_flags |= XFS_IFEXTENTS;
	} else if (whichfork == XFS_ATTR_FORK) {
		if (has_mappings) {
			sc->ip->i_afp = NULL;
		} else {
			sc->ip->i_afp = kmem_zone_zalloc(xfs_ifork_zone,
					KM_SLEEP);
			sc->ip->i_afp->if_flags |= XFS_IFEXTENTS;
		}
	}

	/*
	 * Now that we've reinitialized the in-memory fork and set the inode
	 * back to extents format with zero extents, any extents that we
	 * subsequently map into the file will reinitialize the on-disk fork
	 * area for us.  All we have to do is log the inode core to preserve
	 * the format and extent count fields.
	 */
	*log_flags |= XFS_ILOG_CORE;
}

/* Make our changes permanent so that we can start rebuilding the fork. */
STATIC int
xrep_bmap_commit_new(
	struct xfs_scrub	*sc,
	int			log_flags)
{
	xfs_trans_log_inode(sc->tp, sc->ip, log_flags);
	return xfs_trans_roll_inode(&sc->tp, sc->ip);
}

/* Build new fork mappings and dispose of the old bmbt blocks. */
STATIC int
xrep_bmap_rebuild_tree(
	struct xfs_scrub	*sc,
	int			whichfork,
	struct xfbma		*bmap_records,
	struct xfs_bitmap	*old_bmbt_blocks)
{
	struct xfs_owner_info	oinfo;
	struct xrep_add_bmap	x = {
		.sc		= sc,
		.baseflags	= XFS_BMAPI_NORMAP,
	};
	int			error;

	if (whichfork == XFS_ATTR_FORK)
		x.baseflags |= XFS_BMAPI_ATTRFORK;

	/*
	 * Sort the bmap extents by startblock to avoid btree splits when we
	 * rebuild the bmbt btree.
	 */
	error = xfbma_sort(bmap_records, xrep_bmap_extent_cmp);
	if (error)
		return error;

	/* Dispose of all the old bmbt blocks. */
	xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, whichfork);
	error = xrep_reap_extents(sc, old_bmbt_blocks, &oinfo,
			XFS_AG_RESV_NONE);
	if (error)
		return error;

	/* "Remap" the extents into the fork. */
	return xfbma_iter_del(bmap_records, xrep_bmap_insert_rec, &x);
}

/* Repair an inode fork. */
STATIC int
xrep_bmap(
	struct xfs_scrub	*sc,
	int			whichfork)
{
	struct xfs_bitmap	old_bmbt_blocks;
	struct xfbma		*bmap_records;
	xfs_rfsblock_t		old_bmbt_block_count;
	xfs_rfsblock_t		otherfork_blocks;
	int			log_flags = 0;
	int			error = 0;

	error = xrep_bmap_check_inputs(sc, whichfork);
	if (error)
		return error;

	/* Set up some storage */
	bmap_records = xfbma_init(sizeof(struct xrep_bmap_extent));
	if (IS_ERR(bmap_records))
		return PTR_ERR(bmap_records);

	/* Collect all reverse mappings for this fork's extents. */
	xfs_bitmap_init(&old_bmbt_blocks);
	error = xrep_bmap_find_mappings(sc, whichfork, bmap_records,
			&old_bmbt_blocks, &old_bmbt_block_count,
			&otherfork_blocks);
	if (error)
		goto out;

	/*
	 * Blow out the in-core fork and zero the on-disk fork.  This is the
	 * point at which we are no longer able to bail out gracefully.
	 */
	error = xrep_bmap_reset_counters(sc, old_bmbt_block_count,
			otherfork_blocks, &log_flags);
	if (error)
		goto out;
	xrep_bmap_reset_fork(sc, whichfork, xfbma_length(bmap_records) == 0,
			&log_flags);
	error = xrep_bmap_commit_new(sc, log_flags);
	if (error)
		goto out;

	/* Now rebuild the fork extent map information. */
	error = xrep_bmap_rebuild_tree(sc, whichfork, bmap_records,
			&old_bmbt_blocks);
out:
	xfs_bitmap_destroy(&old_bmbt_blocks);
	xfbma_destroy(bmap_records);
	return error;
}

/* Repair an inode's data fork. */
int
xrep_bmap_data(
	struct xfs_scrub	*sc)
{
	return xrep_bmap(sc, XFS_DATA_FORK);
}

/* Repair an inode's attr fork. */
int
xrep_bmap_attr(
	struct xfs_scrub	*sc)
{
	return xrep_bmap(sc, XFS_ATTR_FORK);
}