summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/tempfile.c
blob: d45ed229b8cb8dec06a9a6f69dd997ac24b15af2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2022 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_xchgrange.h"
#include "xfs_defer.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
#include "scrub/xfile.h"

/*
 * Create a temporary file for reconstructing metadata, with the intention of
 * atomically swapping the temporary file's contents with the file that's
 * being repaired.
 */
int
xrep_tempfile_create(
	struct xfs_scrub	*sc,
	uint16_t		mode)
{
	struct xfs_mount	*mp = sc->mp;
	struct xfs_trans	*tp = NULL;
	struct xfs_dquot	*udqp = NULL;
	struct xfs_dquot	*gdqp = NULL;
	struct xfs_dquot	*pdqp = NULL;
	struct xfs_trans_res	*tres;
	struct xfs_inode	*dp = mp->m_rootip;
	xfs_ino_t		ino;
	unsigned int		resblks;
	bool			is_dir = S_ISDIR(mode);
	int			error;

	if (xfs_is_shutdown(mp))
		return -EIO;
	if (xfs_is_readonly(mp))
		return -EROFS;

	ASSERT(sc->tp == NULL);
	ASSERT(sc->tempip == NULL);

	/*
	 * Make sure that we have allocated dquot(s) on disk.  The temporary
	 * inode should be completely root owned so that we don't fail due to
	 * quota limits.
	 */
	error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0,
			XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp);
	if (error)
		return error;

	if (is_dir) {
		resblks = XFS_MKDIR_SPACE_RES(mp, 0);
		tres = &M_RES(mp)->tr_mkdir;
	} else {
		resblks = XFS_IALLOC_SPACE_RES(mp);
		tres = &M_RES(mp)->tr_create_tmpfile;
	}

	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
			&tp);
	if (error)
		goto out_release_dquots;

	/* Allocate inode, set up directory. */
	error = xfs_dialloc(&tp, dp->i_ino, mode, &ino);
	if (error)
		goto out_trans_cancel;
	error = xfs_init_new_inode(&init_user_ns, tp, dp, ino, mode, 0, 0,
			0, false, &sc->tempip);
	if (error)
		goto out_trans_cancel;

	/* Change the ownership of the inode to root. */
	VFS_I(sc->tempip)->i_uid = GLOBAL_ROOT_UID;
	VFS_I(sc->tempip)->i_gid = GLOBAL_ROOT_GID;
	sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);

	/*
	 * Mark our temporary file as private so that LSMs and the ACL code
	 * don't try to add their own metadata or reason about these files.
	 * The file should never be exposed to userspace.
	 */
	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;

	if (is_dir) {
		error = xfs_dir_init(tp, sc->tempip, dp);
		if (error)
			goto out_trans_cancel;
	}

	/*
	 * Attach the dquot(s) to the inodes and modify them incore.
	 * These ids of the inode couldn't have changed since the new
	 * inode has been locked ever since it was created.
	 */
	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);

	/*
	 * Put our temp file on the unlinked list so it's purged automatically.
	 * Anything being reconstructed using this file must be atomically
	 * swapped with the original file because the contents here will be
	 * purged when the inode is dropped or log recovery cleans out the
	 * unlinked list.
	 */
	error = xfs_iunlink(tp, sc->tempip);
	if (error)
		goto out_trans_cancel;

	error = xfs_trans_commit(tp);
	if (error)
		goto out_release_inode;

	trace_xrep_tempfile_create(sc);

	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	/* Finish setting up the incore / vfs context. */
	xfs_setup_iops(sc->tempip);
	xfs_finish_inode_setup(sc->tempip);

	sc->temp_ilock_flags = 0;
	return error;

out_trans_cancel:
	xfs_trans_cancel(tp);
out_release_inode:
	/*
	 * Wait until after the current transaction is aborted to finish the
	 * setup of the inode and release the inode.  This prevents recursive
	 * transactions and deadlocks from xfs_inactive.
	 */
	if (sc->tempip) {
		xfs_finish_inode_setup(sc->tempip);
		xchk_irele(sc, sc->tempip);
	}
out_release_dquots:
	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	return error;
}

/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
	struct xfs_scrub	*sc)
{
	if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
		sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
		return true;
	}

	return false;
}

/* Release IOLOCK_EXCL on the temporary file. */
void
xrep_tempfile_iounlock(
	struct xfs_scrub	*sc)
{
	xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
	sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
}

/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
void
xrep_tempfile_ilock(
	struct xfs_scrub	*sc)
{
	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
	xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
}

/* Try to grab ILOCK_EXCL on the temporary file. */
bool
xrep_tempfile_ilock_nowait(
	struct xfs_scrub	*sc)
{
	if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
		sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
		return true;
	}

	return false;
}

/* Unlock ILOCK_EXCL on the temporary file after an update. */
void
xrep_tempfile_iunlock(
	struct xfs_scrub	*sc)
{
	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
	sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
}

/* Release the temporary file. */
void
xrep_tempfile_rele(
	struct xfs_scrub	*sc)
{
	if (!sc->tempip)
		return;

	if (sc->temp_ilock_flags) {
		xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
		sc->temp_ilock_flags = 0;
	}

	xchk_irele(sc, sc->tempip);
	sc->tempip = NULL;
}

/*
 * Make sure that the given range of the data fork of the temporary file is
 * mapped to written blocks.  The caller must ensure that both inodes are
 * joined to the transaction.
 */
int
xrep_tempfile_prealloc(
	struct xfs_scrub	*sc,
	xfs_fileoff_t		off,
	xfs_filblks_t		len)
{
	struct xfs_bmbt_irec	map;
	xfs_fileoff_t		end = off + len;
	int			error;

	ASSERT(sc->tempip != NULL);
	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));

	for (; off < end; off = map.br_startoff + map.br_blockcount) {
		int		nmaps = 1;

		/*
		 * If we have a real extent mapping this block then we're
		 * in ok shape.
		 */
		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
				XFS_DATA_FORK);
		if (error)
			return error;
		if (nmaps == 0) {
			ASSERT(nmaps != 0);
			return -EFSCORRUPTED;
		}

		if (xfs_bmap_is_written_extent(&map))
			continue;

		/*
		 * If we find a delalloc reservation then something is very
		 * very wrong.  Bail out.
		 */
		if (map.br_startblock == DELAYSTARTBLOCK)
			return -EFSCORRUPTED;

		/*
		 * Make sure this block has a real zeroed extent allocated to
		 * it.
		 */
		nmaps = 1;
		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
				&nmaps);
		if (error)
			return error;

		trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);

		/* Commit new extent and all deferred work. */
		error = xfs_defer_finish(&sc->tp);
		if (error)
			return error;
	}

	return 0;
}

/*
 * Write data to each block of a file.  The given range of the tempfile's data
 * fork must already be populated with written extents.
 */
int
xrep_tempfile_copyin(
	struct xfs_scrub	*sc,
	xfs_fileoff_t		off,
	xfs_filblks_t		len,
	xrep_tempfile_copyin_fn	prep_fn,
	void			*data)
{
	LIST_HEAD(buffers_list);
	struct xfs_mount	*mp = sc->mp;
	struct xfs_buf		*bp;
	xfs_fileoff_t		flush_mask;
	xfs_fileoff_t		end = off + len;
	loff_t			pos = XFS_FSB_TO_B(mp, off);
	int			error = 0;

	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));

	/* Flush buffers to disk every 512K */
	flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;

	for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
		struct xfs_bmbt_irec	map;
		int			nmaps = 1;

		/* Read block mapping for this file block. */
		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
		if (error)
			goto out_err;
		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
			error = -EFSCORRUPTED;
			goto out_err;
		}

		/* Get the metadata buffer for this offset in the file. */
		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
				XFS_FSB_TO_DADDR(mp, map.br_startblock),
				mp->m_bsize, 0, &bp);
		if (error)
			goto out_err;

		trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);

		/* Read in a block's worth of data from the xfile. */
		error = prep_fn(sc, bp, data);
		if (error) {
			xfs_trans_brelse(sc->tp, bp);
			goto out_err;
		}

		/* Queue buffer, and flush if we have too much dirty data. */
		xfs_buf_delwri_queue_here(bp, &buffers_list);
		xfs_trans_brelse(sc->tp, bp);

		if (!(off & flush_mask)) {
			error = xfs_buf_delwri_submit(&buffers_list);
			if (error)
				goto out_err;
		}
	}

	/*
	 * Write the new blocks to disk.  If the ordered list isn't empty after
	 * that, then something went wrong and we have to fail.  This should
	 * never happen, but we'll check anyway.
	 */
	error = xfs_buf_delwri_submit(&buffers_list);
	if (error)
		goto out_err;

	if (!list_empty(&buffers_list)) {
		ASSERT(list_empty(&buffers_list));
		error = -EIO;
		goto out_err;
	}

	return 0;

out_err:
	xfs_buf_delwri_cancel(&buffers_list);
	return error;
}

/*
 * Set the temporary file's size.  Caller must join the tempfile to the scrub
 * transaction and is responsible for adjusting block mappings as needed.
 */
int
xrep_tempfile_set_isize(
	struct xfs_scrub	*sc,
	unsigned long long	isize)
{
	if (sc->tempip->i_disk_size == isize)
		return 0;

	sc->tempip->i_disk_size = isize;
	i_size_write(VFS_I(sc->tempip), isize);
	return xrep_tempfile_roll_trans(sc);
}

/*
 * Roll a repair transaction involving the temporary file.  Caller must join
 * both the temporary file and the file being scrubbed to the transaction.
 * This function return with both inodes joined to a new scrub transaction,
 * or the usual negative errno.
 */
int
xrep_tempfile_roll_trans(
	struct xfs_scrub	*sc)
{
	int			error;

	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
	error = xrep_roll_trans(sc);
	if (error)
		return error;

	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
	return 0;
}