summaryrefslogtreecommitdiff
path: root/fs/xfs
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2017-12-22 12:37:45 -0800
committerDan Williams <dan.j.williams@intel.com>2018-04-03 05:58:58 -0700
commit1cd49ac7a6bc97988b1e1863058d43a0d5f841a6 (patch)
tree46f566e0511e8990fd9e60683f461787cb434d88 /fs/xfs
parent6df8aac50d349baa8ae01fb92fb61618e8bdd7bb (diff)
xfs, dax: introduce xfs_break_dax_layouts()
xfs_break_dax_layouts(), similar to xfs_break_leased_layouts(), scans for busy / pinned dax pages and waits for those pages to go idle before any potential extent unmap operation. dax_layout_busy_page() handles synchronizing against new page-busy events (get_user_pages). It invalidates all mappings to trigger the get_user_pages slow path which will eventually block on the xfs inode lock held in XFS_MMAPLOCK_EXCL mode. If dax_layout_busy_page() finds a busy page it returns it for xfs to wait for the page-idle event that will fire when the page reference count reaches 1 (recall ZONE_DEVICE pages are idle at count 1, see generic_dax_pagefree()). While waiting, the XFS_MMAPLOCK_EXCL lock is dropped in order to not deadlock the process that might be trying to elevate the page count of more pages before arranging for any of them to go idle. I.e. the typical case of submitting I/O is that iov_iter_get_pages() elevates the reference count of all pages in the I/O before starting I/O on the first page. The process of elevating the reference count of all pages involved in an I/O may cause faults that need to take XFS_MMAPLOCK_EXCL. Cc: Jan Kara <jack@suse.cz> Cc: Dave Chinner <david@fromorbit.com> Cc: "Darrick J. Wong" <darrick.wong@oracle.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Reviewed-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'fs/xfs')
-rw-r--r--fs/xfs/xfs_file.c60
1 files changed, 49 insertions, 11 deletions
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 51e6506bdcb1..0342f6fb782f 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -752,6 +752,38 @@ buffered:
return ret;
}
+static void
+xfs_wait_var_event(
+ struct inode *inode,
+ uint iolock,
+ bool *did_unlock)
+{
+ struct xfs_inode *ip = XFS_I(inode);
+
+ *did_unlock = true;
+ xfs_iunlock(ip, iolock);
+ schedule();
+ xfs_ilock(ip, iolock);
+}
+
+static int
+xfs_break_dax_layouts(
+ struct inode *inode,
+ uint iolock,
+ bool *did_unlock)
+{
+ struct page *page;
+
+ *did_unlock = false;
+ page = dax_layout_busy_page(inode->i_mapping);
+ if (!page)
+ return 0;
+
+ return ___wait_var_event(&page->_refcount,
+ atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
+ 0, 0, xfs_wait_var_event(inode, iolock, did_unlock));
+}
+
int
xfs_break_layouts(
struct inode *inode,
@@ -763,17 +795,23 @@ xfs_break_layouts(
ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL));
- switch (reason) {
- case BREAK_UNMAP:
- ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
- /* fall through */
- case BREAK_WRITE:
- error = xfs_break_leased_layouts(inode, iolock, &retry);
- break;
- default:
- WARN_ON_ONCE(1);
- return -EINVAL;
- }
+ do {
+ switch (reason) {
+ case BREAK_UNMAP:
+ ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL));
+
+ error = xfs_break_dax_layouts(inode, *iolock, &retry);
+ /* fall through */
+ case BREAK_WRITE:
+ if (error || retry)
+ break;
+ error = xfs_break_leased_layouts(inode, iolock, &retry);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ return -EINVAL;
+ }
+ } while (error == 0 && retry);
return error;
}