diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2019-01-16 10:12:05 -0800 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-04-15 17:02:45 -0700 |
commit | d059a892c058f19f7bdd681ca99d2441b7d7a3ac (patch) | |
tree | 7f9a8ed29d73c9f2cac867eb14a70a34994867d0 /fs/xfs/scrub/xfile.c | |
parent | e33e4050b1b4a15ca20300fc32deb5075ab8ff52 (diff) |
xfs: convert big array and blob array to use memfd backendrepair-part-one_2019-04-15
There are several problems with the initial implementations of the big
array and the blob array data structures. First, using linked lists
imposes a two-pointer overhead on every record stored. For blobs this
isn't serious, but for fixed-size records this increases memory
requirements by 40-60%. Second, we're using kernel memory to store the
intermediate records. Kernel memory cannot be paged out, which means we
run the risk of OOMing the machine when we run out of physical memory.
Therefore, replace the linked lists in both structures with memfd files.
Random access becomes much easier, memory overhead drops to a negligible
amount, and because memfd pages can be swapped, we have considerably
more flexibility for memory use.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
Diffstat (limited to 'fs/xfs/scrub/xfile.c')
-rw-r--r-- | fs/xfs/scrub/xfile.c | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c new file mode 100644 index 000000000000..232b9ebbdf84 --- /dev/null +++ b/fs/xfs/scrub/xfile.c @@ -0,0 +1,121 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright (C) 2019 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <darrick.wong@oracle.com> + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "scrub/array.h" +#include "scrub/scrub.h" +#include "scrub/trace.h" +#include "scrub/xfile.h" +#include <linux/shmem_fs.h> + +/* + * Create a memfd to our specifications and return a file pointer. The file + * is not installed in the file description table (because userspace has no + * business accessing our internal data), which means that the caller /must/ + * fput the file when finished. + */ +struct file * +xfile_create( + const char *description) +{ + struct file *filp; + + filp = shmem_file_setup(description, 0, 0); + if (IS_ERR_OR_NULL(filp)) + return filp; + + filp->f_mode |= FMODE_PREAD | FMODE_PWRITE; + filp->f_flags |= O_RDWR | O_LARGEFILE; + return filp; +} + +void +xfile_destroy( + struct file *filp) +{ + fput(filp); +} + +struct xfile_io_args { + struct work_struct work; + struct completion *done; + + struct file *filp; + void *ptr; + loff_t *pos; + size_t count; + ssize_t ret; + bool is_read; +}; + +static void +xfile_io_worker( + struct work_struct *work) +{ + struct xfile_io_args *args; + unsigned int pflags; + + args = container_of(work, struct xfile_io_args, work); + pflags = memalloc_nofs_save(); + + if (args->is_read) + args->ret = kernel_read(args->filp, args->ptr, args->count, + args->pos); + else + args->ret = kernel_write(args->filp, args->ptr, args->count, + args->pos); + complete(args->done); + + memalloc_nofs_restore(pflags); +} + +/* + * Perform a read or write IO to the file backing the array. We can defer + * the work to a workqueue if the caller so desires, either to reduce stack + * usage or because the xfs is frozen and we want to avoid deadlocking on the + * page fault that might be about to happen. + */ +int +xfile_io( + struct file *filp, + unsigned int cmd_flags, + loff_t *pos, + void *ptr, + size_t count) +{ + DECLARE_COMPLETION_ONSTACK(done); + struct xfile_io_args args = { + .filp = filp, + .ptr = ptr, + .pos = pos, + .count = count, + .done = &done, + .is_read = (cmd_flags & XFILE_IO_MASK) == XFILE_IO_READ, + }; + + INIT_WORK_ONSTACK(&args.work, xfile_io_worker); + schedule_work(&args.work); + wait_for_completion(&done); + destroy_work_on_stack(&args.work); + + /* + * Since we're treating this file as "memory", any IO error should be + * treated as a failure to find any memory. + */ + return args.ret == count ? 0 : -ENOMEM; +} + +/* Discard pages backing a range of the file. */ +void +xfile_discard( + struct file *filp, + loff_t start, + loff_t end) +{ + shmem_truncate_range(file_inode(filp), start, end); +} |