summaryrefslogtreecommitdiff
path: root/fs/xfs/xfs_exchrange.c
blob: 4cd824e47f751d32e830cd31a8c223f2ec7c4fed (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2020-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_defer.h"
#include "xfs_inode.h"
#include "xfs_trans.h"
#include "xfs_exchrange.h"
#include <linux/fsnotify.h>

/*
 * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE.
 * This part deals with struct file objects and byte ranges and does not deal
 * with XFS-specific data structures such as xfs_inodes and block ranges.  This
 * separation may some day facilitate porting to another filesystem.
 *
 * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in
 * file1 with the same number of bytes starting at fxr.file2_offset in file2.
 * Implementations must call xfs_exchange_range_prep to prepare the two
 * files prior to taking locks; and they must update the inode change and mod
 * times of both files as part of the metadata update.  The timestamp update
 * and freshness checks must be done atomically as part of the data exchange
 * operation to ensure correctness of the freshness check.
 * xfs_exchange_range_finish must be called after the operation completes
 * successfully but before locks are dropped.
 */

/* Verify that we have security clearance to perform this operation. */
static int
xfs_exchange_range_verify_area(
	struct xfs_exchrange	*fxr)
{
	int			ret;

	ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length,
			true);
	if (ret)
		return ret;

	return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length,
			true);
}

/*
 * Performs necessary checks before doing a range exchange, having stabilized
 * mutable inode attributes via i_rwsem.
 */
static inline int
xfs_exchange_range_checks(
	struct xfs_exchrange	*fxr,
	unsigned int		alloc_unit)
{
	struct inode		*inode1 = file_inode(fxr->file1);
	struct inode		*inode2 = file_inode(fxr->file2);
	uint64_t		allocmask = alloc_unit - 1;
	int64_t			test_len;
	uint64_t		blen;
	loff_t			size1, size2, tmp;
	int			error;

	/* Don't touch certain kinds of inodes */
	if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2))
		return -EPERM;
	if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2))
		return -ETXTBSY;

	size1 = i_size_read(inode1);
	size2 = i_size_read(inode2);

	/* Ranges cannot start after EOF. */
	if (fxr->file1_offset > size1 || fxr->file2_offset > size2)
		return -EINVAL;

	/*
	 * If the caller said to exchange to EOF, we set the length of the
	 * request large enough to cover everything to the end of both files.
	 */
	if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) {
		fxr->length = max_t(int64_t, size1 - fxr->file1_offset,
					     size2 - fxr->file2_offset);

		error = xfs_exchange_range_verify_area(fxr);
		if (error)
			return error;
	}

	/*
	 * The start of both ranges must be aligned to the file allocation
	 * unit.
	 */
	if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) ||
	    !IS_ALIGNED(fxr->file2_offset, alloc_unit))
		return -EINVAL;

	/* Ensure offsets don't wrap. */
	if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) ||
	    check_add_overflow(fxr->file2_offset, fxr->length, &tmp))
		return -EINVAL;

	/*
	 * We require both ranges to end within EOF, unless we're exchanging
	 * to EOF.
	 */
	if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) &&
	    (fxr->file1_offset + fxr->length > size1 ||
	     fxr->file2_offset + fxr->length > size2))
		return -EINVAL;

	/*
	 * Make sure we don't hit any file size limits.  If we hit any size
	 * limits such that test_length was adjusted, we abort the whole
	 * operation.
	 */
	test_len = fxr->length;
	error = generic_write_check_limits(fxr->file2, fxr->file2_offset,
			&test_len);
	if (error)
		return error;
	error = generic_write_check_limits(fxr->file1, fxr->file1_offset,
			&test_len);
	if (error)
		return error;
	if (test_len != fxr->length)
		return -EINVAL;

	/*
	 * If the user wanted us to exchange up to the infile's EOF, round up
	 * to the next allocation unit boundary for this check.  Do the same
	 * for the outfile.
	 *
	 * Otherwise, reject the range length if it's not aligned to an
	 * allocation unit.
	 */
	if (fxr->file1_offset + fxr->length == size1)
		blen = ALIGN(size1, alloc_unit) - fxr->file1_offset;
	else if (fxr->file2_offset + fxr->length == size2)
		blen = ALIGN(size2, alloc_unit) - fxr->file2_offset;
	else if (!IS_ALIGNED(fxr->length, alloc_unit))
		return -EINVAL;
	else
		blen = fxr->length;

	/* Don't allow overlapped exchanges within the same file. */
	if (inode1 == inode2 &&
	    fxr->file2_offset + blen > fxr->file1_offset &&
	    fxr->file1_offset + blen > fxr->file2_offset)
		return -EINVAL;

	/*
	 * Ensure that we don't exchange a partial EOF block into the middle of
	 * another file.
	 */
	if ((fxr->length & allocmask) == 0)
		return 0;

	blen = fxr->length;
	if (fxr->file2_offset + blen < size2)
		blen &= ~allocmask;

	if (fxr->file1_offset + blen < size1)
		blen &= ~allocmask;

	return blen == fxr->length ? 0 : -EINVAL;
}

/*
 * Check that the two inodes are eligible for range exchanges, the ranges make
 * sense, and then flush all dirty data.  Caller must ensure that the inodes
 * have been locked against any other modifications.
 */
static inline int
xfs_exchange_range_prep(
	struct xfs_exchrange	*fxr,
	unsigned int		alloc_unit)
{
	struct inode		*inode1 = file_inode(fxr->file1);
	struct inode		*inode2 = file_inode(fxr->file2);
	bool			same_inode = (inode1 == inode2);
	int			error;

	/* Check that we don't violate system file offset limits. */
	error = xfs_exchange_range_checks(fxr, alloc_unit);
	if (error || fxr->length == 0)
		return error;

	/* Wait for the completion of any pending IOs on both files */
	inode_dio_wait(inode1);
	if (!same_inode)
		inode_dio_wait(inode2);

	error = filemap_write_and_wait_range(inode1->i_mapping,
			fxr->file1_offset,
			fxr->file1_offset + fxr->length - 1);
	if (error)
		return error;

	error = filemap_write_and_wait_range(inode2->i_mapping,
			fxr->file2_offset,
			fxr->file2_offset + fxr->length - 1);
	if (error)
		return error;

	/*
	 * If the files or inodes involved require synchronous writes, amend
	 * the request to force the filesystem to flush all data and metadata
	 * to disk after the operation completes.
	 */
	if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) ||
	    IS_SYNC(inode1) || IS_SYNC(inode2))
		fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC;

	return 0;
}

/*
 * Finish a range exchange operation, if it was successful.  Caller must ensure
 * that the inodes are still locked against any other modifications.
 */
static inline int
xfs_exchange_range_finish(
	struct xfs_exchrange	*fxr)
{
	int			error;

	error = file_remove_privs(fxr->file1);
	if (error)
		return error;
	if (file_inode(fxr->file1) == file_inode(fxr->file2))
		return 0;

	return file_remove_privs(fxr->file2);
}

/* Exchange parts of two files. */
static int
xfs_exchange_range(
	struct xfs_exchrange	*fxr)
{
	struct inode		*inode1 = file_inode(fxr->file1);
	struct inode		*inode2 = file_inode(fxr->file2);
	int			ret;

	BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS &
		     XFS_EXCHANGE_RANGE_PRIV_FLAGS);

	/* Both files must be on the same mount/filesystem. */
	if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt)
		return -EXDEV;

	if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
		return -EINVAL;

	/* Userspace requests only honored for regular files. */
	if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode))
		return -EISDIR;
	if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode))
		return -EINVAL;

	/* Both files must be opened for read and write. */
	if (!(fxr->file1->f_mode & FMODE_READ) ||
	    !(fxr->file1->f_mode & FMODE_WRITE) ||
	    !(fxr->file2->f_mode & FMODE_READ) ||
	    !(fxr->file2->f_mode & FMODE_WRITE))
		return -EBADF;

	/* Neither file can be opened append-only. */
	if ((fxr->file1->f_flags & O_APPEND) ||
	    (fxr->file2->f_flags & O_APPEND))
		return -EBADF;

	/*
	 * If we're not exchanging to EOF, we can check the areas before
	 * stabilizing both files' i_size.
	 */
	if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) {
		ret = xfs_exchange_range_verify_area(fxr);
		if (ret)
			return ret;
	}

	/* Update cmtime if the fd/inode don't forbid it. */
	if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1))
		fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1;
	if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2))
		fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2;

	file_start_write(fxr->file2);
	ret = -EOPNOTSUPP; /* XXX call out to lower level code */
	file_end_write(fxr->file2);
	if (ret)
		return ret;

	fsnotify_modify(fxr->file1);
	if (fxr->file2 != fxr->file1)
		fsnotify_modify(fxr->file2);
	return 0;
}

/* Collect exchange-range arguments from userspace. */
long
xfs_ioc_exchange_range(
	struct file			*file,
	struct xfs_exchange_range __user *argp)
{
	struct xfs_exchrange		fxr = {
		.file2			= file,
	};
	struct xfs_exchange_range	args;
	struct fd			file1;
	int				error;

	if (copy_from_user(&args, argp, sizeof(args)))
		return -EFAULT;
	if (memchr_inv(&args.pad, 0, sizeof(args.pad)))
		return -EINVAL;
	if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS)
		return -EINVAL;

	fxr.file1_offset	= args.file1_offset;
	fxr.file2_offset	= args.file2_offset;
	fxr.length		= args.length;
	fxr.flags		= args.flags;

	file1 = fdget(args.file1_fd);
	if (!file1.file)
		return -EBADF;
	fxr.file1 = file1.file;

	error = xfs_exchange_range(&fxr);
	fdput(file1);
	return error;
}