summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/tempfile.c
blob: 09dbefa6204999849d41d07c4cf816c072b6d9b7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2021 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_xchgrange.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
#include "scrub/xfile.h"

/*
 * Create a temporary file for reconstructing metadata, with the intention of
 * atomically swapping the temporary file's contents with the file that's
 * being repaired.
 */
int
xrep_tempfile_create(
	struct xfs_scrub	*sc,
	uint16_t		mode)
{
	struct xfs_mount	*mp = sc->mp;
	struct xfs_trans	*tp = NULL;
	struct xfs_dquot	*udqp = NULL;
	struct xfs_dquot	*gdqp = NULL;
	struct xfs_dquot	*pdqp = NULL;
	struct xfs_trans_res	*tres;
	struct xfs_inode	*dp = mp->m_rootip;
	xfs_ino_t		ino;
	unsigned int		resblks;
	bool			is_dir = S_ISDIR(mode);
	bool			use_log = false;
	int			error;

	if (xfs_is_shutdown(mp))
		return -EIO;
	if (xfs_is_readonly(mp))
		return -EROFS;

	ASSERT(sc->tp == NULL);
	ASSERT(sc->tempip == NULL);

	/* Enable atomic extent swapping. */
	error = xfs_xchg_range_grab_log_assist(mp, true, &use_log);
	if (error)
		return error;
	ASSERT(use_log);
	sc->flags |= XREP_ATOMIC_EXCHANGE;

	/*
	 * Make sure that we have allocated dquot(s) on disk.  The temporary
	 * inode should be completely root owned so that we don't fail due to
	 * quota limits.
	 */
	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
	if (error)
		return error;

	if (is_dir) {
		resblks = XFS_MKDIR_SPACE_RES(mp, 0);
		tres = &M_RES(mp)->tr_mkdir;
	} else {
		resblks = XFS_IALLOC_SPACE_RES(mp);
		tres = &M_RES(mp)->tr_create_tmpfile;
	}

	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
			&tp);
	if (error)
		goto out_release_dquots;

	/* Allocate inode, set up directory. */
	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
	if (error)
		goto out_trans_cancel;
	error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0,
			0, false, &sc->tempip);
	if (error)
		goto out_trans_cancel;

	/* Change the ownership of the inode to root. */
	VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
	VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);

	/*
	 * Mark our temporary file as private so that LSMs and the ACL code
	 * don't try to add their own metadata or reason about these files.
	 * The file should never be exposed to userspace.
	 */
	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;

	if (is_dir) {
		error = xfs_dir_init(tp, sc->tempip, dp);
		if (error)
			goto out_trans_cancel;
	}

	/*
	 * Attach the dquot(s) to the inodes and modify them incore.
	 * These ids of the inode couldn't have changed since the new
	 * inode has been locked ever since it was created.
	 */
	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);

	/*
	 * Put our temp file on the unlinked list so it's purged automatically.
	 * Anything being reconstructed using this file must be atomically
	 * swapped with the original file because the contents here will be
	 * purged when the inode is dropped or log recovery cleans out the
	 * unlinked list.
	 */
	error = xfs_iunlink(tp, sc->tempip);
	if (error)
		goto out_trans_cancel;

	error = xfs_trans_commit(tp);
	if (error)
		goto out_release_inode;

	trace_xrep_tempfile_create(sc);

	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	/* Finish setting up the incore / vfs context. */
	xfs_setup_iops(sc->tempip);
	xfs_finish_inode_setup(sc->tempip);

	sc->temp_ilock_flags = 0;
	return error;

out_trans_cancel:
	xfs_trans_cancel(tp);
out_release_inode:
	/*
	 * Wait until after the current transaction is aborted to finish the
	 * setup of the inode and release the inode.  This prevents recursive
	 * transactions and deadlocks from xfs_inactive.
	 */
	if (sc->tempip) {
		xfs_finish_inode_setup(sc->tempip);
		xfs_irele(sc->tempip);
	}
out_release_dquots:
	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	return error;
}

void
xrep_tempfile_ilock(
	struct xfs_scrub	*sc,
	unsigned int		ilock_flags)
{
	sc->temp_ilock_flags |= ilock_flags;
	xfs_ilock(sc->tempip, ilock_flags);
}

bool
xrep_tempfile_ilock_nowait(
	struct xfs_scrub	*sc,
	unsigned int		ilock_flags)
{
	if (xfs_ilock_nowait(sc->tempip, ilock_flags)) {
		sc->temp_ilock_flags |= ilock_flags;
		return true;
	}

	return false;
}

void
xrep_tempfile_iunlock(
	struct xfs_scrub	*sc,
	unsigned int		ilock_flags)
{
	xfs_iunlock(sc->tempip, ilock_flags);
	sc->temp_ilock_flags &= ~ilock_flags;
}

/* Release the temporary file. */
void
xrep_tempfile_rele(
	struct xfs_scrub	*sc)
{
	if (!sc->tempip)
		return;

	if (sc->temp_ilock_flags)
		xrep_tempfile_iunlock(sc, sc->temp_ilock_flags);
	xfs_irele(sc->tempip);
	sc->tempip = NULL;
}

/*
 * Make sure that the given range of the data fork of the temporary file is
 * mapped to written blocks.  The caller must ensure that both inodes are
 * joined to the transaction.
 */
int
xrep_tempfile_prealloc(
	struct xfs_scrub	*sc,
	xfs_fileoff_t		off,
	xfs_filblks_t		len)
{
	struct xfs_bmbt_irec	map;
	xfs_fileoff_t		end = off + len;
	int			nmaps;
	int			error = 0;

	ASSERT(sc->tempip != NULL);
	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));

	while (off < len) {
		/*
		 * If we have a real extent mapping this block then we're
		 * in ok shape.
		 */
		nmaps = 1;
		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
				XFS_DATA_FORK);
		if (error)
			break;

		if (nmaps == 1 && xfs_bmap_is_written_extent(&map)) {
			off += map.br_startblock;
			continue;
		}

		/*
		 * If we find a delalloc reservation then something is very
		 * very wrong.  Bail out.
		 */
		if (map.br_startblock == DELAYSTARTBLOCK)
			return -EFSCORRUPTED;

		/*
		 * Make sure this rtsum block has a real zeroed extent
		 * allocated to it.
		 */
		nmaps = 1;
		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
				&nmaps);
		if (error)
			break;

		error = xrep_roll_trans(sc);
		if (error)
			break;
		off += map.br_startblock;
	}

	return error;
}

/*
 * Write a number of bytes from the xfile into the temp file.  The copybuf must
 * be large enough to hold one filesystem block's worth of data.  The caller
 * must join both inodes to the transaction.
 */
int
xrep_tempfile_copyin(
	struct xfs_scrub	*sc,
	const struct xfs_buf_ops *ops,
	enum xfs_blft		type,
	xfs_fileoff_t		isize)
{
	LIST_HEAD(buffers_list);
	struct xfs_bmbt_irec	map;
	struct xfs_mount	*mp = sc->mp;
	struct xfs_buf		*bp;
	xfs_rtblock_t		off = 0;
	loff_t			pos = 0;
	unsigned int		nr_buffers = 0;
	int			nmaps;
	int			error = 0;

	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));

	for (; pos < isize; pos += mp->m_sb.sb_blocksize, off++) {
		size_t		count;

		/* Read block mapping for this file block. */
		nmaps = 1;
		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
		if (error)
			goto out;
		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
			error = -EFSCORRUPTED;
			goto out;
		}

		/* Get the metadata buffer for this offset in the file. */
		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
				XFS_FSB_TO_DADDR(mp, map.br_startblock),
				mp->m_bsize, 0, &bp);
		if (error)
			goto out;
		bp->b_ops = ops;
		xfs_trans_buf_set_type(sc->tp, bp, type);

		/* Read in a block's worth of data from the xfile. */
		count = min_t(loff_t, isize - pos, mp->m_sb.sb_blocksize);
		error = xfile_obj_load(sc->xfile, bp->b_addr, count, pos);
		if (error) {
			xfs_trans_brelse(sc->tp, bp);
			goto out;
		}

		/*
		 * Put this buffer on the delwri list so we can write them all
		 * out in batches.
		 */
		xfs_buf_delwri_queue(bp, &buffers_list);
		xfs_trans_brelse(sc->tp, bp);
		nr_buffers++;

		/*
		 * If we have more than 256K of data to write out, flush it to
		 * disk so we don't use up too much memory.
		 */
		if (XFS_FSB_TO_B(mp, nr_buffers) > 262144) {
			error = xfs_buf_delwri_submit(&buffers_list);
			if (error)
				goto out;
			nr_buffers = 0;
		}
	}

	/*
	 * Write the new blocks to disk.  If the ordered list isn't empty after
	 * that, then something went wrong and we have to fail.  This should
	 * never happen, but we'll check anyway.
	 */
	error = xfs_buf_delwri_submit(&buffers_list);
	if (error)
		goto out;
	if (!list_empty(&buffers_list)) {
		ASSERT(list_empty(&buffers_list));
		return -EIO;
	}

	/* Set the new inode size, if needed. */
	if (sc->tempip->i_disk_size != isize) {
		sc->tempip->i_disk_size = isize;
		i_size_write(VFS_I(sc->tempip), isize);
		xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
		return xrep_roll_trans(sc);
	}

	return 0;
out:
	xfs_buf_delwri_cancel(&buffers_list);
	return error;
}