diff options
author | Alistair Popple <apopple@nvidia.com> | 2025-02-28 14:31:00 +1100 |
---|---|---|
committer | Andrew Morton <akpm@linux-foundation.org> | 2025-03-17 22:06:37 -0700 |
commit | d5b3afea22a52517e6bc835432e6dfd079b8bf7c (patch) | |
tree | 235abb54692a302f93b99a3fc0ff05802200746d | |
parent | e6fa3963a30d53427d33a577c5ba4bfd3373bc93 (diff) |
fs/dax: create a common implementation to break DAX layouts
Prior to freeing a block file systems supporting FS DAX must check that
the associated pages are both unmapped from user-space and not undergoing
DMA or other access from eg. get_user_pages(). This is achieved by
unmapping the file range and scanning the FS DAX page-cache to see if any
pages within the mapping have an elevated refcount.
This is done using two functions - dax_layout_busy_page_range() which
returns a page to wait for the refcount to become idle on. Rather than
open-code this introduce a common implementation to both unmap and wait
for the page to become idle.
Link: https://lkml.kernel.org/r/c4d381e41fc618296cee2820403c166d80599d5c.1740713401.git-series.apopple@nvidia.com
Signed-off-by: Alistair Popple <apopple@nvidia.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Tested-by: Alison Schofield <alison.schofield@intel.com>
Cc: Alexander Gordeev <agordeev@linux.ibm.com>
Cc: Asahi Lina <lina@asahilina.net>
Cc: Balbir Singh <balbirs@nvidia.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Christian Borntraeger <borntraeger@linux.ibm.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Chunyan Zhang <zhang.lyra@gmail.com>
Cc: "Darrick J. Wong" <djwong@kernel.org>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Gerald Schaefer <gerald.schaefer@linux.ibm.com>
Cc: Heiko Carstens <hca@linux.ibm.com>
Cc: Huacai Chen <chenhuacai@kernel.org>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Jason Gunthorpe <jgg@nvidia.com>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: John Hubbard <jhubbard@nvidia.com>
Cc: linmiaohe <linmiaohe@huawei.com>
Cc: Logan Gunthorpe <logang@deltatee.com>
Cc: Matthew Wilcow (Oracle) <willy@infradead.org>
Cc: Michael "Camp Drill Sergeant" Ellerman <mpe@ellerman.id.au>
Cc: Nicholas Piggin <npiggin@gmail.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: Sven Schnelle <svens@linux.ibm.com>
Cc: Ted Ts'o <tytso@mit.edu>
Cc: Vasily Gorbik <gor@linux.ibm.com>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: WANG Xuerui <kernel@xen0n.name>
Cc: Will Deacon <will@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
-rw-r--r-- | fs/dax.c | 33 | ||||
-rw-r--r-- | fs/ext4/inode.c | 13 | ||||
-rw-r--r-- | fs/fuse/dax.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 26 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | include/linux/dax.h | 23 |
6 files changed, 63 insertions, 61 deletions
@@ -846,6 +846,39 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index) return ret; } +static int wait_page_idle(struct page *page, + void (cb)(struct inode *), + struct inode *inode) +{ + return ___wait_var_event(page, dax_page_is_idle(page), + TASK_INTERRUPTIBLE, 0, 0, cb(inode)); +} + +/* + * Unmaps the inode and waits for any DMA to complete prior to deleting the + * DAX mapping entries for the range. + */ +int dax_break_layout(struct inode *inode, loff_t start, loff_t end, + void (cb)(struct inode *)) +{ + struct page *page; + int error = 0; + + if (!dax_mapping(inode->i_mapping)) + return 0; + + do { + page = dax_layout_busy_page_range(inode->i_mapping, start, end); + if (!page) + break; + + error = wait_page_idle(page, cb, inode); + } while (error == 0); + + return error; +} +EXPORT_SYMBOL_GPL(dax_break_layout); + /* * Invalidate DAX entry if it is clean. */ diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index cc1acb1fdec6..2342bac14a9e 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3911,21 +3911,10 @@ static void ext4_wait_dax_page(struct inode *inode) int ext4_break_layouts(struct inode *inode) { - struct page *page; - int error; - if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock))) return -EINVAL; - do { - page = dax_layout_busy_page(inode->i_mapping); - if (!page) - return 0; - - error = dax_wait_page_idle(page, ext4_wait_dax_page, inode); - } while (error == 0); - - return error; + return dax_break_layout_inode(inode, ext4_wait_dax_page); } /* diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index bf6faa3536a4..0502bf3cdf6a 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -666,33 +666,12 @@ static void fuse_wait_dax_page(struct inode *inode) filemap_invalidate_lock(inode->i_mapping); } -/* Should be called with mapping->invalidate_lock held exclusively */ -static int __fuse_dax_break_layouts(struct inode *inode, bool *retry, - loff_t start, loff_t end) -{ - struct page *page; - - page = dax_layout_busy_page_range(inode->i_mapping, start, end); - if (!page) - return 0; - - *retry = true; - return dax_wait_page_idle(page, fuse_wait_dax_page, inode); -} - +/* Should be called with mapping->invalidate_lock held exclusively. */ int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start, u64 dmap_end) { - bool retry; - int ret; - - do { - retry = false; - ret = __fuse_dax_break_layouts(inode, &retry, dmap_start, - dmap_end); - } while (ret == 0 && retry); - - return ret; + return dax_break_layout(inode, dmap_start, dmap_end, + fuse_wait_dax_page); } ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 1b5613dfed7f..d4f07e02b28b 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -2735,21 +2735,17 @@ xfs_mmaplock_two_inodes_and_break_dax_layout( struct xfs_inode *ip2) { int error; - bool retry; struct page *page; if (ip1->i_ino > ip2->i_ino) swap(ip1, ip2); again: - retry = false; /* Lock the first inode */ xfs_ilock(ip1, XFS_MMAPLOCK_EXCL); - error = xfs_break_dax_layouts(VFS_I(ip1), &retry); - if (error || retry) { + error = xfs_break_dax_layouts(VFS_I(ip1)); + if (error) { xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); - if (error == 0 && retry) - goto again; return error; } @@ -2764,7 +2760,7 @@ again: * for this nested lock case. */ page = dax_layout_busy_page(VFS_I(ip2)->i_mapping); - if (page && page_ref_count(page) != 1) { + if (!dax_page_is_idle(page)) { xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL); goto again; @@ -3008,19 +3004,11 @@ xfs_wait_dax_page( int xfs_break_dax_layouts( - struct inode *inode, - bool *retry) + struct inode *inode) { - struct page *page; - xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL); - page = dax_layout_busy_page(inode->i_mapping); - if (!page) - return 0; - - *retry = true; - return dax_wait_page_idle(page, xfs_wait_dax_page, inode); + return dax_break_layout_inode(inode, xfs_wait_dax_page); } int @@ -3038,8 +3026,8 @@ xfs_break_layouts( retry = false; switch (reason) { case BREAK_UNMAP: - error = xfs_break_dax_layouts(inode, &retry); - if (error || retry) + error = xfs_break_dax_layouts(inode); + if (error) break; fallthrough; case BREAK_WRITE: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index c08093a65352..123dfa965c6e 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -603,7 +603,7 @@ xfs_itruncate_extents( return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0); } -int xfs_break_dax_layouts(struct inode *inode, bool *retry); +int xfs_break_dax_layouts(struct inode *inode); int xfs_break_layouts(struct inode *inode, uint *iolock, enum layout_break_reason reason); diff --git a/include/linux/dax.h b/include/linux/dax.h index 9b1ce984d410..a6b277f1e13a 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -207,12 +207,9 @@ int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, int dax_truncate_page(struct inode *inode, loff_t pos, bool *did_zero, const struct iomap_ops *ops); -static inline int dax_wait_page_idle(struct page *page, - void (cb)(struct inode *), - struct inode *inode) +static inline bool dax_page_is_idle(struct page *page) { - return ___wait_var_event(page, page_ref_count(page) == 1, - TASK_INTERRUPTIBLE, 0, 0, cb(inode)); + return page && page_ref_count(page) == 1; } #if IS_ENABLED(CONFIG_DAX) @@ -228,6 +225,15 @@ static inline void dax_read_unlock(int id) { } #endif /* CONFIG_DAX */ + +#if !IS_ENABLED(CONFIG_FS_DAX) +static inline int __must_check dax_break_layout(struct inode *inode, + loff_t start, loff_t end, void (cb)(struct inode *)) +{ + return 0; +} +#endif + bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, @@ -251,6 +257,13 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); +int __must_check dax_break_layout(struct inode *inode, loff_t start, + loff_t end, void (cb)(struct inode *)); +static inline int __must_check dax_break_layout_inode(struct inode *inode, + void (cb)(struct inode *)) +{ + return dax_break_layout(inode, 0, LLONG_MAX, cb); +} int dax_dedupe_file_range_compare(struct inode *src, loff_t srcoff, struct inode *dest, loff_t destoff, loff_t len, bool *is_same, |