xfs: convert big array and blob array to use memfd backendrepair-part-one_2019-02-04

There are several problems with the initial implementations of the big array and the blob array data structures. First, using linked lists imposes a two-pointer overhead on every record stored. For blobs this isn't serious, but for fixed-size records this increases memory requirements by 40-60%. Second, we're using kernel memory to store the intermediate records. Kernel memory cannot be paged out, which means we run the risk of OOMing the machine when we run out of physical memory. Therefore, replace the linked lists in both structures with memfd files. Random access becomes much easier, memory overhead drops to a negligible amount, and because memfd pages can be swapped, we have considerably more flexibility for memory use. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
author: Darrick J. Wong <darrick.wong@oracle.com> 2019-01-16 10:12:05 -0800
committer: Darrick J. Wong <darrick.wong@oracle.com> 2019-02-04 09:31:13 -0800
commit: 54eb14450a9d85e44f324264cfda39673ff685f8 (patch)
tree: e44dfb08a50213dc46c578f48ee1b61750280509 /fs/xfs/scrub/xfile.c
parent: bfae316eb34c7c0e5b223ee49ad01fe35f72d1f6 (diff)
1 files changed, 121 insertions, 0 deletions
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
new file mode 100644
index 000000000000..92a6aea71cfa
--- /dev/null
+++ b/fs/xfs/scrub/xfile.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "scrub/array.h"
+#include "scrub/scrub.h"
+#include "scrub/trace.h"
+#include "scrub/xfile.h"
+#include <linux/shmem_fs.h>
+
+/*
+ * Create a memfd to our specifications and return a file pointer.  The file
+ * is not installed in the file description table (because userspace has no
+ * business accessing our internal data), which means that the caller /must/
+ * fput the file when finished.
+ */
+struct file *
+xfile_create(
+	const char	*description)
+{
+	struct file	*filp;
+
+	filp = shmem_file_setup(description, 0, 0);
+	if (IS_ERR_OR_NULL(filp))
+		return filp;
+
+	filp->f_mode |= FMODE_PREAD | FMODE_PWRITE;
+	filp->f_flags |= O_RDWR | O_LARGEFILE;
+	return filp;
+}
+
+void
+xfile_destroy(
+	struct file	*filp)
+{
+	fput(filp);
+}
+
+struct xfile_io_args {
+	struct work_struct	work;
+	struct completion	*done;
+
+	struct file		*filp;
+	void			*ptr;
+	loff_t			*pos;
+	size_t			count;
+	ssize_t			ret;
+	bool			is_read;
+};
+
+static void
+xfile_io_worker(
+	struct work_struct	*work)
+{
+	struct xfile_io_args	*args;
+	unsigned int		pflags;
+
+	args = container_of(work, struct xfile_io_args, work);
+	pflags = memalloc_nofs_save();
+
+	if (args->is_read)
+		args->ret = kernel_read(args->filp, args->ptr, args->count,
+				args->pos);
+	else
+		args->ret = kernel_write(args->filp, args->ptr, args->count,
+				args->pos);
+	complete(args->done);
+
+	memalloc_nofs_restore(pflags);
+}
+
+/*
+ * Perform a read or write IO to the file backing the array.  We can defer
+ * the work to a workqueue if the caller so desires, either to reduce stack
+ * usage or because the xfs is frozen and we want to avoid deadlocking on the
+ * page fault that might be about to happen.
+ */
+int
+xfile_io(
+	struct file	*filp,
+	unsigned int	cmd_flags,
+	loff_t		*pos,
+	void		*ptr,
+	size_t		count)
+{
+	DECLARE_COMPLETION_ONSTACK(done);
+	struct xfile_io_args	args = {
+		.filp = filp,
+		.ptr = ptr,
+		.pos = pos,
+		.count = count,
+		.done = &done,
+		.is_read = (cmd_flags & XFILE_IO_MASK) == XFILE_IO_READ,
+	};
+
+	INIT_WORK_ONSTACK(&args.work, xfile_io_worker);
+	schedule_work(&args.work);
+	wait_for_completion(&done);
+	destroy_work_on_stack(&args.work);
+
+	/*
+	 * Since we're treating this file as "memory", any IO error should be
+	 * treated as a failure to find any memory.
+	 */
+	return args.ret == count ? 0 : -ENOMEM;
+}
+
+/* Discard pages backing a range of the file. */
+void
+xfile_discard(
+	struct file	*filp,
+	loff_t		start,
+	loff_t		end)
+{
+	shmem_truncate_range(file_inode(filp), start, end);
+}
author	Darrick J. Wong <darrick.wong@oracle.com>	2019-01-16 10:12:05 -0800
committer	Darrick J. Wong <darrick.wong@oracle.com>	2019-02-04 09:31:13 -0800
commit	54eb14450a9d85e44f324264cfda39673ff685f8 (patch)
tree	e44dfb08a50213dc46c578f48ee1b61750280509 /fs/xfs/scrub/xfile.c
parent	bfae316eb34c7c0e5b223ee49ad01fe35f72d1f6 (diff)