summaryrefslogtreecommitdiff
path: root/fs/xfs/scrub/xfile.c
diff options
context:
space:
mode:
authorDarrick J. Wong <darrick.wong@oracle.com>2019-01-16 10:12:05 -0800
committerDarrick J. Wong <darrick.wong@oracle.com>2019-02-04 09:31:13 -0800
commit54eb14450a9d85e44f324264cfda39673ff685f8 (patch)
treee44dfb08a50213dc46c578f48ee1b61750280509 /fs/xfs/scrub/xfile.c
parentbfae316eb34c7c0e5b223ee49ad01fe35f72d1f6 (diff)
xfs: convert big array and blob array to use memfd backendrepair-part-one_2019-02-04
There are several problems with the initial implementations of the big array and the blob array data structures. First, using linked lists imposes a two-pointer overhead on every record stored. For blobs this isn't serious, but for fixed-size records this increases memory requirements by 40-60%. Second, we're using kernel memory to store the intermediate records. Kernel memory cannot be paged out, which means we run the risk of OOMing the machine when we run out of physical memory. Therefore, replace the linked lists in both structures with memfd files. Random access becomes much easier, memory overhead drops to a negligible amount, and because memfd pages can be swapped, we have considerably more flexibility for memory use. Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/xfs/scrub/xfile.c')
-rw-r--r--fs/xfs/scrub/xfile.c121
1 files changed, 121 insertions, 0 deletions
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
new file mode 100644
index 000000000000..92a6aea71cfa
--- /dev/null
+++ b/fs/xfs/scrub/xfile.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2018 Oracle. All Rights Reserved.
+ * Author: Darrick J. Wong <darrick.wong@oracle.com>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_shared.h"
+#include "xfs_format.h"
+#include "scrub/array.h"
+#include "scrub/scrub.h"
+#include "scrub/trace.h"
+#include "scrub/xfile.h"
+#include <linux/shmem_fs.h>
+
+/*
+ * Create a memfd to our specifications and return a file pointer. The file
+ * is not installed in the file description table (because userspace has no
+ * business accessing our internal data), which means that the caller /must/
+ * fput the file when finished.
+ */
+struct file *
+xfile_create(
+ const char *description)
+{
+ struct file *filp;
+
+ filp = shmem_file_setup(description, 0, 0);
+ if (IS_ERR_OR_NULL(filp))
+ return filp;
+
+ filp->f_mode |= FMODE_PREAD | FMODE_PWRITE;
+ filp->f_flags |= O_RDWR | O_LARGEFILE;
+ return filp;
+}
+
+void
+xfile_destroy(
+ struct file *filp)
+{
+ fput(filp);
+}
+
+struct xfile_io_args {
+ struct work_struct work;
+ struct completion *done;
+
+ struct file *filp;
+ void *ptr;
+ loff_t *pos;
+ size_t count;
+ ssize_t ret;
+ bool is_read;
+};
+
+static void
+xfile_io_worker(
+ struct work_struct *work)
+{
+ struct xfile_io_args *args;
+ unsigned int pflags;
+
+ args = container_of(work, struct xfile_io_args, work);
+ pflags = memalloc_nofs_save();
+
+ if (args->is_read)
+ args->ret = kernel_read(args->filp, args->ptr, args->count,
+ args->pos);
+ else
+ args->ret = kernel_write(args->filp, args->ptr, args->count,
+ args->pos);
+ complete(args->done);
+
+ memalloc_nofs_restore(pflags);
+}
+
+/*
+ * Perform a read or write IO to the file backing the array. We can defer
+ * the work to a workqueue if the caller so desires, either to reduce stack
+ * usage or because the xfs is frozen and we want to avoid deadlocking on the
+ * page fault that might be about to happen.
+ */
+int
+xfile_io(
+ struct file *filp,
+ unsigned int cmd_flags,
+ loff_t *pos,
+ void *ptr,
+ size_t count)
+{
+ DECLARE_COMPLETION_ONSTACK(done);
+ struct xfile_io_args args = {
+ .filp = filp,
+ .ptr = ptr,
+ .pos = pos,
+ .count = count,
+ .done = &done,
+ .is_read = (cmd_flags & XFILE_IO_MASK) == XFILE_IO_READ,
+ };
+
+ INIT_WORK_ONSTACK(&args.work, xfile_io_worker);
+ schedule_work(&args.work);
+ wait_for_completion(&done);
+ destroy_work_on_stack(&args.work);
+
+ /*
+ * Since we're treating this file as "memory", any IO error should be
+ * treated as a failure to find any memory.
+ */
+ return args.ret == count ? 0 : -ENOMEM;
+}
+
+/* Discard pages backing a range of the file. */
+void
+xfile_discard(
+ struct file *filp,
+ loff_t start,
+ loff_t end)
+{
+ shmem_truncate_range(file_inode(filp), start, end);
+}