From c9c4ff12df110feb1b91951010f673f4b16e49e8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 27 Nov 2023 13:58:07 +0000 Subject: netfs: Move pinning-for-writeback from fscache to netfs Move the resource pinning-for-writeback from fscache code to netfslib code. This is used to keep a cache backing object pinned whilst we have dirty pages on the netfs inode in the pagecache such that VM writeback will be able to reach it. Whilst we're at it, switch the parameters of netfs_unpin_writeback() to match ->write_inode() so that it can be used for that directly. Note that this mechanism could be more generically useful than that for network filesystems. Quite often they have to keep around other resources (e.g. authentication tokens or network connections) until the writeback is complete. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org --- fs/netfs/misc.c | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) create mode 100644 fs/netfs/misc.c (limited to 'fs/netfs/misc.c') diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c new file mode 100644 index 000000000000..68baf55c47a4 --- /dev/null +++ b/fs/netfs/misc.c @@ -0,0 +1,86 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Miscellaneous routines. + * + * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +#include +#include "internal.h" + +/** + * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback + * @mapping: The mapping the folio belongs to. + * @folio: The folio being dirtied. + * + * Set the dirty flag on a folio and pin an in-use cache object in memory so + * that writeback can later write to it. This is intended to be called from + * the filesystem's ->dirty_folio() method. + * + * Return: true if the dirty flag was set on the folio, false otherwise. + */ +bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio) +{ + struct inode *inode = mapping->host; + struct netfs_inode *ictx = netfs_inode(inode); + struct fscache_cookie *cookie = netfs_i_cookie(ictx); + bool need_use = false; + + _enter(""); + + if (!filemap_dirty_folio(mapping, folio)) + return false; + if (!fscache_cookie_valid(cookie)) + return true; + + if (!(inode->i_state & I_PINNING_NETFS_WB)) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_PINNING_NETFS_WB)) { + inode->i_state |= I_PINNING_NETFS_WB; + need_use = true; + } + spin_unlock(&inode->i_lock); + + if (need_use) + fscache_use_cookie(cookie, true); + } + return true; +} +EXPORT_SYMBOL(netfs_dirty_folio); + +/** + * netfs_unpin_writeback - Unpin writeback resources + * @inode: The inode on which the cookie resides + * @wbc: The writeback control + * + * Unpin the writeback resources pinned by netfs_dirty_folio(). This is + * intended to be called as/by the netfs's ->write_inode() method. + */ +int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc) +{ + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); + + if (wbc->unpinned_netfs_wb) + fscache_unuse_cookie(cookie, NULL, NULL); + return 0; +} +EXPORT_SYMBOL(netfs_unpin_writeback); + +/** + * netfs_clear_inode_writeback - Clear writeback resources pinned by an inode + * @inode: The inode to clean up + * @aux: Auxiliary data to apply to the inode + * + * Clear any writeback resources held by an inode when the inode is evicted. + * This must be called before clear_inode() is called. + */ +void netfs_clear_inode_writeback(struct inode *inode, const void *aux) +{ + struct fscache_cookie *cookie = netfs_i_cookie(netfs_inode(inode)); + + if (inode->i_state & I_PINNING_NETFS_WB) { + loff_t i_size = i_size_read(inode); + fscache_unuse_cookie(cookie, aux, &i_size); + } +} +EXPORT_SYMBOL(netfs_clear_inode_writeback); -- cgit v1.2.3 From c1ec4d7c2e13471558cfea302b7583856284f94c Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 20 Aug 2021 17:08:30 +0100 Subject: netfs: Provide invalidate_folio and release_folio calls Provide default invalidate_folio and release_folio calls. These will need to interact with invalidation correctly at some point. They will be needed if netfslib is to make use of folio->private for its own purposes. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org --- fs/9p/vfs_addr.c | 33 ++------------------------------ fs/afs/file.c | 53 ++++----------------------------------------------- fs/ceph/addr.c | 24 ++--------------------- fs/ceph/cache.h | 10 ---------- fs/netfs/misc.c | 42 ++++++++++++++++++++++++++++++++++++++++ include/linux/netfs.h | 6 ++++-- 6 files changed, 54 insertions(+), 114 deletions(-) (limited to 'fs/netfs/misc.c') diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index 131b83c31f85..055b672a247d 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -88,35 +88,6 @@ const struct netfs_request_ops v9fs_req_ops = { .issue_read = v9fs_issue_read, }; -/** - * v9fs_release_folio - release the private state associated with a folio - * @folio: The folio to be released - * @gfp: The caller's allocation restrictions - * - * Returns true if the page can be released, false otherwise. - */ - -static bool v9fs_release_folio(struct folio *folio, gfp_t gfp) -{ - if (folio_test_private(folio)) - return false; -#ifdef CONFIG_9P_FSCACHE - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - fscache_note_page_release(v9fs_inode_cookie(V9FS_I(folio_inode(folio)))); -#endif - return true; -} - -static void v9fs_invalidate_folio(struct folio *folio, size_t offset, - size_t length) -{ - folio_wait_fscache(folio); -} - #ifdef CONFIG_9P_FSCACHE static void v9fs_write_to_cache_done(void *priv, ssize_t transferred_or_error, bool was_async) @@ -324,8 +295,8 @@ const struct address_space_operations v9fs_addr_operations = { .writepage = v9fs_vfs_writepage, .write_begin = v9fs_write_begin, .write_end = v9fs_write_end, - .release_folio = v9fs_release_folio, - .invalidate_folio = v9fs_invalidate_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, .launder_folio = v9fs_launder_folio, .direct_IO = v9fs_direct_IO, }; diff --git a/fs/afs/file.c b/fs/afs/file.c index 0d783e5b2147..d152ba451f0e 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -20,9 +20,6 @@ static int afs_file_mmap(struct file *file, struct vm_area_struct *vma); static int afs_symlink_read_folio(struct file *file, struct folio *folio); -static void afs_invalidate_folio(struct folio *folio, size_t offset, - size_t length); -static bool afs_release_folio(struct folio *folio, gfp_t gfp_flags); static ssize_t afs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter); static ssize_t afs_file_splice_read(struct file *in, loff_t *ppos, @@ -57,8 +54,8 @@ const struct address_space_operations afs_file_aops = { .readahead = netfs_readahead, .dirty_folio = netfs_dirty_folio, .launder_folio = afs_launder_folio, - .release_folio = afs_release_folio, - .invalidate_folio = afs_invalidate_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, .write_begin = afs_write_begin, .write_end = afs_write_end, .writepages = afs_writepages, @@ -67,8 +64,8 @@ const struct address_space_operations afs_file_aops = { const struct address_space_operations afs_symlink_aops = { .read_folio = afs_symlink_read_folio, - .release_folio = afs_release_folio, - .invalidate_folio = afs_invalidate_folio, + .release_folio = netfs_release_folio, + .invalidate_folio = netfs_invalidate_folio, .migrate_folio = filemap_migrate_folio, }; @@ -386,48 +383,6 @@ const struct netfs_request_ops afs_req_ops = { .issue_read = afs_issue_read, }; -/* - * invalidate part or all of a page - * - release a page and clean up its private data if offset is 0 (indicating - * the entire page) - */ -static void afs_invalidate_folio(struct folio *folio, size_t offset, - size_t length) -{ - _enter("{%lu},%zu,%zu", folio->index, offset, length); - - folio_wait_fscache(folio); - _leave(""); -} - -/* - * release a page and clean up its private state if it's not busy - * - return true if the page can now be released, false if not - */ -static bool afs_release_folio(struct folio *folio, gfp_t gfp) -{ - struct afs_vnode *vnode = AFS_FS_I(folio_inode(folio)); - - _enter("{{%llx:%llu}[%lu],%lx},%x", - vnode->fid.vid, vnode->fid.vnode, folio_index(folio), folio->flags, - gfp); - - /* deny if folio is being written to the cache and the caller hasn't - * elected to wait */ -#ifdef CONFIG_AFS_FSCACHE - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - fscache_note_page_release(afs_vnode_cache(vnode)); -#endif - - /* Indicate that the folio can be released */ - _leave(" = T"); - return true; -} - static void afs_add_open_mmap(struct afs_vnode *vnode) { if (atomic_inc_return(&vnode->cb_nr_mmap) == 1) { diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 3b8641febeac..8eedc62e7ac4 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -159,27 +159,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, ceph_put_snap_context(snapc); } - folio_wait_fscache(folio); -} - -static bool ceph_release_folio(struct folio *folio, gfp_t gfp) -{ - struct inode *inode = folio->mapping->host; - struct ceph_client *cl = ceph_inode_to_client(inode); - - doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), - folio->index, folio_test_dirty(folio) ? "" : "not "); - - if (folio_test_private(folio)) - return false; - - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - ceph_fscache_note_page_release(inode); - return true; + netfs_invalidate_folio(folio, offset, length); } static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) @@ -1585,7 +1565,7 @@ const struct address_space_operations ceph_aops = { .write_end = ceph_write_end, .dirty_folio = ceph_dirty_folio, .invalidate_folio = ceph_invalidate_folio, - .release_folio = ceph_release_folio, + .release_folio = netfs_release_folio, .direct_IO = noop_direct_IO, }; diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index 8fc7d828d990..20efac020394 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -56,12 +56,6 @@ static inline bool ceph_is_cache_enabled(struct inode *inode) return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode))); } -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - - fscache_note_page_release(ceph_fscache_cookie(ci)); -} #else /* CONFIG_CEPH_FSCACHE */ static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc) @@ -118,10 +112,6 @@ static inline bool ceph_is_cache_enabled(struct inode *inode) { return false; } - -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ -} #endif /* CONFIG_CEPH_FSCACHE */ #endif diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 68baf55c47a4..45bb19ec9a63 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -84,3 +84,45 @@ void netfs_clear_inode_writeback(struct inode *inode, const void *aux) } } EXPORT_SYMBOL(netfs_clear_inode_writeback); + +/** + * netfs_invalidate_folio - Invalidate or partially invalidate a folio + * @folio: Folio proposed for release + * @offset: Offset of the invalidated region + * @length: Length of the invalidated region + * + * Invalidate part or all of a folio for a network filesystem. The folio will + * be removed afterwards if the invalidated region covers the entire folio. + */ +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) +{ + _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); + + folio_wait_fscache(folio); +} +EXPORT_SYMBOL(netfs_invalidate_folio); + +/** + * netfs_release_folio - Try to release a folio + * @folio: Folio proposed for release + * @gfp: Flags qualifying the release + * + * Request release of a folio and clean up its private state if it's not busy. + * Returns true if the folio can now be released, false if not + */ +bool netfs_release_folio(struct folio *folio, gfp_t gfp) +{ + struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); + + if (folio_test_private(folio)) + return false; + if (folio_test_fscache(folio)) { + if (current_is_kswapd() || !(gfp & __GFP_FS)) + return false; + folio_wait_fscache(folio); + } + + fscache_note_page_release(netfs_i_cookie(ctx)); + return true; +} +EXPORT_SYMBOL(netfs_release_folio); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 06f57d9d09f6..8efbfd3b2820 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -293,11 +293,13 @@ struct readahead_control; void netfs_readahead(struct readahead_control *); int netfs_read_folio(struct file *, struct folio *); int netfs_write_begin(struct netfs_inode *, struct file *, - struct address_space *, loff_t pos, unsigned int len, - struct folio **, void **fsdata); + struct address_space *, loff_t pos, unsigned int len, + struct folio **, void **fsdata); bool netfs_dirty_folio(struct address_space *mapping, struct folio *folio); int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc); void netfs_clear_inode_writeback(struct inode *inode, const void *aux); +void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length); +bool netfs_release_folio(struct folio *folio, gfp_t gfp); void netfs_subreq_terminated(struct netfs_io_subrequest *, ssize_t, bool); void netfs_get_subrequest(struct netfs_io_subrequest *subreq, -- cgit v1.2.3 From 7d828a06634799aba0fa392913c7fe2953eb64a6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 22 Sep 2023 13:25:22 +0100 Subject: netfs: Provide tools to create a buffer in an xarray Provide tools to create a buffer in an xarray, with a function to add new folios with a mark. This will be used to create bounce buffer and can be used more easily to create a list of folios the span of which would require more than a page's worth of bio_vec structs. Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org --- fs/netfs/internal.h | 13 +++++++++ fs/netfs/misc.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++ include/linux/netfs.h | 4 +++ 3 files changed, 98 insertions(+) (limited to 'fs/netfs/misc.c') diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 4708fb15446b..b908c7e0a901 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -56,6 +56,19 @@ static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq) {} static inline void netfs_proc_del_rreq(struct netfs_io_request *rreq) {} #endif +/* + * misc.c + */ +#define NETFS_FLAG_PUT_MARK BIT(0) +#define NETFS_FLAG_PAGECACHE_MARK BIT(1) +int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, + struct folio *folio, unsigned int flags, + gfp_t gfp_mask); +int netfs_add_folios_to_buffer(struct xarray *buffer, + struct address_space *mapping, + pgoff_t index, pgoff_t to, gfp_t gfp_mask); +void netfs_clear_buffer(struct xarray *buffer); + /* * objects.c */ diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 45bb19ec9a63..5d545073fe03 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -8,6 +8,87 @@ #include #include "internal.h" +/* + * Attach a folio to the buffer and maybe set marks on it to say that we need + * to put the folio later and twiddle the pagecache flags. + */ +int netfs_xa_store_and_mark(struct xarray *xa, unsigned long index, + struct folio *folio, unsigned int flags, + gfp_t gfp_mask) +{ + XA_STATE_ORDER(xas, xa, index, folio_order(folio)); + +retry: + xas_lock(&xas); + for (;;) { + xas_store(&xas, folio); + if (!xas_error(&xas)) + break; + xas_unlock(&xas); + if (!xas_nomem(&xas, gfp_mask)) + return xas_error(&xas); + goto retry; + } + + if (flags & NETFS_FLAG_PUT_MARK) + xas_set_mark(&xas, NETFS_BUF_PUT_MARK); + if (flags & NETFS_FLAG_PAGECACHE_MARK) + xas_set_mark(&xas, NETFS_BUF_PAGECACHE_MARK); + xas_unlock(&xas); + return xas_error(&xas); +} + +/* + * Create the specified range of folios in the buffer attached to the read + * request. The folios are marked with NETFS_BUF_PUT_MARK so that we know that + * these need freeing later. + */ +int netfs_add_folios_to_buffer(struct xarray *buffer, + struct address_space *mapping, + pgoff_t index, pgoff_t to, gfp_t gfp_mask) +{ + struct folio *folio; + int ret; + + if (to + 1 == index) /* Page range is inclusive */ + return 0; + + do { + /* TODO: Figure out what order folio can be allocated here */ + folio = filemap_alloc_folio(readahead_gfp_mask(mapping), 0); + if (!folio) + return -ENOMEM; + folio->index = index; + ret = netfs_xa_store_and_mark(buffer, index, folio, + NETFS_FLAG_PUT_MARK, gfp_mask); + if (ret < 0) { + folio_put(folio); + return ret; + } + + index += folio_nr_pages(folio); + } while (index <= to && index != 0); + + return 0; +} + +/* + * Clear an xarray buffer, putting a ref on the folios that have + * NETFS_BUF_PUT_MARK set. + */ +void netfs_clear_buffer(struct xarray *buffer) +{ + struct folio *folio; + XA_STATE(xas, buffer, 0); + + rcu_read_lock(); + xas_for_each_marked(&xas, folio, ULONG_MAX, NETFS_BUF_PUT_MARK) { + folio_put(folio); + } + rcu_read_unlock(); + xa_destroy(buffer); +} + /** * netfs_dirty_folio - Mark folio dirty and pin a cache object for writeback * @mapping: The mapping the folio belongs to. diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 2bb1273b38f4..c05365e3f428 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -109,6 +109,10 @@ static inline int wait_on_page_fscache_killable(struct page *page) return folio_wait_private_2_killable(page_folio(page)); } +/* Marks used on xarray-based buffers */ +#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */ +#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */ + enum netfs_io_source { NETFS_FILL_WITH_ZEROES, NETFS_DOWNLOAD_FROM_SERVER, -- cgit v1.2.3 From 9ebff83e648148b9ece97d4e4890dd84ca54d6ce Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 29 Sep 2023 17:28:25 +0100 Subject: netfs: Prep to use folio->private for write grouping and streaming write Prepare to use folio->private to hold information write grouping and streaming write. These are implemented in the same commit as they both make use of folio->private and will be both checked at the same time in several places. "Write grouping" involves ordering the writeback of groups of writes, such as is needed for ceph snaps. A group is represented by a filesystem-supplied object which must contain a netfs_group struct. This contains just a refcount and a pointer to a destructor. "Streaming write" is the storage of data in folios that are marked dirty, but not uptodate, to avoid unnecessary reads of data. This is represented by a netfs_folio struct. This contains the offset and length of the modified region plus the otherwise displaced write grouping pointer. The way folio->private is multiplexed is: (1) If private is NULL then neither is in operation on a dirty folio. (2) If private is set, with bit 0 clear, then this points to a group. (3) If private is set, with bit 0 set, then this points to a netfs_folio struct (with bit 0 AND'ed out). Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org --- fs/netfs/internal.h | 28 ++++++++++++++++++++++++++++ fs/netfs/misc.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/netfs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 3 files changed, 115 insertions(+) (limited to 'fs/netfs/misc.c') diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h index 2bf2e82b2ad7..d72292e40f9b 100644 --- a/fs/netfs/internal.h +++ b/fs/netfs/internal.h @@ -149,6 +149,34 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx) #endif } +/* + * Get a ref on a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group) +{ + if (netfs_group) + refcount_inc(&netfs_group->ref); + return netfs_group; +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group(struct netfs_group *netfs_group) +{ + if (netfs_group && refcount_dec_and_test(&netfs_group->ref)) + netfs_group->free(netfs_group); +} + +/* + * Dispose of a netfs group attached to a dirty page (e.g. a ceph snap). + */ +static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr) +{ + if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref)) + netfs_group->free(netfs_group); +} + /* * fscache-cache.c */ diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 5d545073fe03..eeb44abe59c5 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -177,9 +177,55 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback); */ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) { + struct netfs_folio *finfo = NULL; + size_t flen = folio_size(folio); + _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); folio_wait_fscache(folio); + + if (!folio_test_private(folio)) + return; + + finfo = netfs_folio_info(folio); + + if (offset == 0 && length >= flen) + goto erase_completely; + + if (finfo) { + /* We have a partially uptodate page from a streaming write. */ + unsigned int fstart = finfo->dirty_offset; + unsigned int fend = fstart + finfo->dirty_len; + unsigned int end = offset + length; + + if (offset >= fend) + return; + if (end <= fstart) + return; + if (offset <= fstart && end >= fend) + goto erase_completely; + if (offset <= fstart && end > fstart) + goto reduce_len; + if (offset > fstart && end >= fend) + goto move_start; + /* A partial write was split. The caller has already zeroed + * it, so just absorb the hole. + */ + } + return; + +erase_completely: + netfs_put_group(netfs_folio_group(folio)); + folio_detach_private(folio); + folio_clear_uptodate(folio); + kfree(finfo); + return; +reduce_len: + finfo->dirty_len = offset + length - finfo->dirty_offset; + return; +move_start: + finfo->dirty_len -= offset - finfo->dirty_offset; + finfo->dirty_offset = offset; } EXPORT_SYMBOL(netfs_invalidate_folio); diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 3fc41f616621..cfba83e3e3d2 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -140,6 +140,47 @@ struct netfs_inode { #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ }; +/* + * A netfs group - for instance a ceph snap. This is marked on dirty pages and + * pages marked with a group must be flushed before they can be written under + * the domain of another group. + */ +struct netfs_group { + refcount_t ref; + void (*free)(struct netfs_group *netfs_group); +}; + +/* + * Information about a dirty page (attached only if necessary). + * folio->private + */ +struct netfs_folio { + struct netfs_group *netfs_group; /* Filesystem's grouping marker (or NULL). */ + unsigned int dirty_offset; /* Write-streaming dirty data offset */ + unsigned int dirty_len; /* Write-streaming dirty data length */ +}; +#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */ + +static inline struct netfs_folio *netfs_folio_info(struct folio *folio) +{ + void *priv = folio_get_private(folio); + + if ((unsigned long)priv & NETFS_FOLIO_INFO) + return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO); + return NULL; +} + +static inline struct netfs_group *netfs_folio_group(struct folio *folio) +{ + struct netfs_folio *finfo; + void *priv = folio_get_private(folio); + + finfo = netfs_folio_info(folio); + if (finfo) + return finfo->netfs_group; + return priv; +} + /* * Resources required to do operations on a cache. */ -- cgit v1.2.3 From 100ccd18bb41ea7abb4fbb419202c06079559501 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 24 Nov 2023 13:39:02 +0000 Subject: netfs: Optimise away reads above the point at which there can be no data Track the file position above which the server is not expected to have any data (the "zero point") and preemptively assume that we can satisfy requests by filling them with zeroes locally rather than attempting to download them if they're over that line - even if we've written data back to the server. Assume that any data that was written back above that position is held in the local cache. Note that we have to split requests that straddle the line. Make use of this to optimise away some reads from the server. We need to set the zero point in the following circumstances: (1) When we see an extant remote inode and have no cache for it, we set the zero_point to i_size. (2) On local inode creation, we set zero_point to 0. (3) On local truncation down, we reduce zero_point to the new i_size if the new i_size is lower. (4) On local truncation up, we don't change zero_point. (5) On local modification, we don't change zero_point. (6) On remote invalidation, we set zero_point to the new i_size. (7) If stored data is discarded from the pagecache or culled from fscache, we must set zero_point above that if the data also got written to the server. (8) If dirty data is written back to the server, but not fscache, we must set zero_point above that. (9) If a direct I/O write is made, set zero_point above that. Assuming the above, any read from the server at or above the zero_point position will return all zeroes. The zero_point value can be stored in the cache, provided the above rules are applied to it by any code that culls part of the local cache. Signed-off-by: David Howells cc: Jeff Layton cc: linux-cachefs@redhat.com cc: linux-fsdevel@vger.kernel.org cc: linux-mm@kvack.org --- fs/9p/vfs_inode.c | 2 +- fs/afs/dynroot.c | 2 +- fs/afs/inode.c | 24 ++++++++++++++---------- fs/ceph/inode.c | 2 +- fs/netfs/buffered_write.c | 2 +- fs/netfs/direct_write.c | 4 ++++ fs/netfs/io.c | 10 ++++++++++ fs/netfs/misc.c | 5 +++++ fs/nfs/fscache.h | 2 +- fs/smb/client/cifsfs.c | 4 ++-- include/linux/netfs.h | 21 ++++++++++++++++++--- 11 files changed, 58 insertions(+), 20 deletions(-) (limited to 'fs/netfs/misc.c') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 74122540e00f..df7ae381a708 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -249,7 +249,7 @@ void v9fs_free_inode(struct inode *inode) static void v9fs_set_netfs_context(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); - netfs_inode_init(&v9inode->netfs, &v9fs_req_ops); + netfs_inode_init(&v9inode->netfs, &v9fs_req_ops, true); } int v9fs_init_inode(struct v9fs_session_info *v9ses, diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 1f656005018e..9c517269ff95 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -76,7 +76,7 @@ struct inode *afs_iget_pseudo_dir(struct super_block *sb, bool root) /* there shouldn't be an existing inode */ BUG_ON(!(inode->i_state & I_NEW)); - netfs_inode_init(&vnode->netfs, NULL); + netfs_inode_init(&vnode->netfs, NULL, false); inode->i_size = 0; inode->i_mode = S_IFDIR | S_IRUGO | S_IXUGO; if (root) { diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 37485ae31471..381521e9e118 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -58,7 +58,7 @@ static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *paren */ static void afs_set_netfs_context(struct afs_vnode *vnode) { - netfs_inode_init(&vnode->netfs, &afs_req_ops); + netfs_inode_init(&vnode->netfs, &afs_req_ops, true); } /* @@ -168,6 +168,7 @@ static void afs_apply_status(struct afs_operation *op, struct inode *inode = &vnode->netfs.inode; struct timespec64 t; umode_t mode; + bool unexpected_jump = false; bool data_changed = false; bool change_size = vp->set_size; @@ -231,6 +232,7 @@ static void afs_apply_status(struct afs_operation *op, } change_size = true; data_changed = true; + unexpected_jump = true; } else if (vnode->status.type == AFS_FTYPE_DIR) { /* Expected directory change is handled elsewhere so * that we can locally edit the directory and save on a @@ -252,6 +254,8 @@ static void afs_apply_status(struct afs_operation *op, vnode->netfs.remote_i_size = status->size; if (change_size || status->size > i_size_read(inode)) { afs_set_i_size(vnode, status->size); + if (unexpected_jump) + vnode->netfs.zero_point = status->size; inode_set_ctime_to_ts(inode, t); inode_set_atime_to_ts(inode, t); } @@ -865,17 +869,17 @@ static void afs_setattr_success(struct afs_operation *op) static void afs_setattr_edit_file(struct afs_operation *op) { struct afs_vnode_param *vp = &op->file[0]; - struct inode *inode = &vp->vnode->netfs.inode; + struct afs_vnode *vnode = vp->vnode; if (op->setattr.attr->ia_valid & ATTR_SIZE) { loff_t size = op->setattr.attr->ia_size; loff_t i_size = op->setattr.old_i_size; - if (size < i_size) - truncate_pagecache(inode, size); - if (size != i_size) - fscache_resize_cookie(afs_vnode_cache(vp->vnode), - vp->scb.status.size); + if (size != i_size) { + truncate_setsize(&vnode->netfs.inode, size); + netfs_resize_file(&vnode->netfs, size, true); + fscache_resize_cookie(afs_vnode_cache(vnode), size); + } } } @@ -943,11 +947,11 @@ int afs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, */ if (!(attr->ia_valid & (supported & ~ATTR_SIZE & ~ATTR_MTIME)) && attr->ia_size < i_size && - attr->ia_size > vnode->status.size) { - truncate_pagecache(inode, attr->ia_size); + attr->ia_size > vnode->netfs.remote_i_size) { + truncate_setsize(inode, attr->ia_size); + netfs_resize_file(&vnode->netfs, size, false); fscache_resize_cookie(afs_vnode_cache(vnode), attr->ia_size); - i_size_write(inode, attr->ia_size); ret = 0; goto out_unlock; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 3149d79a9dbe..0c25d326afc4 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -574,7 +574,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) doutc(fsc->client, "%p\n", &ci->netfs.inode); /* Set parameters for the netfs library */ - netfs_inode_init(&ci->netfs, &ceph_netfs_ops); + netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); spin_lock_init(&ci->i_ceph_lock); diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 6ca6c4bde5eb..08f28800232c 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -73,7 +73,7 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx, if (folio_test_uptodate(folio)) return NETFS_FOLIO_IS_UPTODATE; - if (pos >= ctx->remote_i_size) + if (pos >= ctx->zero_point) return NETFS_MODIFY_AND_CLEAR; if (!maybe_trouble && offset == 0 && len >= flen) diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c index bb0c2718f57b..aad05f2349a4 100644 --- a/fs/netfs/direct_write.c +++ b/fs/netfs/direct_write.c @@ -134,6 +134,7 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; struct netfs_inode *ictx = netfs_inode(inode); + unsigned long long end; ssize_t ret; _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode)); @@ -155,6 +156,9 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from) ret = kiocb_invalidate_pages(iocb, iov_iter_count(from)); if (ret < 0) goto out; + end = iocb->ki_pos + iov_iter_count(from); + if (end > ictx->zero_point) + ictx->zero_point = end; fscache_invalidate(netfs_i_cookie(ictx), NULL, i_size_read(inode), FSCACHE_INVAL_DIO_WRITE); diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 14c18be5aca0..5b5af96cd4b9 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -569,6 +569,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, struct iov_iter *io_iter) { enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER; + struct netfs_inode *ictx = netfs_inode(rreq->inode); size_t lsize; _enter("%llx-%llx,%llx", subreq->start, subreq->start + subreq->len, rreq->i_size); @@ -586,6 +587,14 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, * to make serial calls, it can indicate a short read and then * we will call it again. */ + if (rreq->origin != NETFS_DIO_READ) { + if (subreq->start >= ictx->zero_point) { + source = NETFS_FILL_WITH_ZEROES; + goto set; + } + if (subreq->len > ictx->zero_point - subreq->start) + subreq->len = ictx->zero_point - subreq->start; + } if (subreq->len > rreq->i_size - subreq->start) subreq->len = rreq->i_size - subreq->start; if (rreq->rsize && subreq->len > rreq->rsize) @@ -607,6 +616,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq, } } +set: if (subreq->len > rreq->len) pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n", rreq->debug_id, subreq->debug_index, diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index eeb44abe59c5..0e3af37fc924 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -240,6 +240,11 @@ EXPORT_SYMBOL(netfs_invalidate_folio); bool netfs_release_folio(struct folio *folio, gfp_t gfp) { struct netfs_inode *ctx = netfs_inode(folio_inode(folio)); + unsigned long long end; + + end = folio_pos(folio) + folio_size(folio); + if (end > ctx->zero_point) + ctx->zero_point = end; if (folio_test_private(folio)) return false; diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h index 5407ab8c8783..e3cb4923316b 100644 --- a/fs/nfs/fscache.h +++ b/fs/nfs/fscache.h @@ -80,7 +80,7 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs) } static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi) { - netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops); + netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false); } extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr); extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr); diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 96a65cf9b5ec..07cd88897c33 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -1220,7 +1220,7 @@ static int cifs_precopy_set_eof(struct inode *src_inode, struct cifsInodeInfo *s if (rc < 0) goto set_failed; - netfs_resize_file(&src_cifsi->netfs, src_end); + netfs_resize_file(&src_cifsi->netfs, src_end, true); fscache_resize_cookie(cifs_inode_cookie(src_inode), src_end); return 0; @@ -1351,7 +1351,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, smb_file_src, smb_file_target, off, len, destoff); if (rc == 0 && new_size > i_size_read(target_inode)) { truncate_setsize(target_inode, new_size); - netfs_resize_file(&target_cifsi->netfs, new_size); + netfs_resize_file(&target_cifsi->netfs, new_size, true); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); } diff --git a/include/linux/netfs.h b/include/linux/netfs.h index 8a2dd882a781..852956aa3c4b 100644 --- a/include/linux/netfs.h +++ b/include/linux/netfs.h @@ -136,6 +136,8 @@ struct netfs_inode { struct fscache_cookie *cache; #endif loff_t remote_i_size; /* Size of the remote file */ + loff_t zero_point; /* Size after which we assume there's no data + * on the server */ unsigned long flags; #define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */ #define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */ @@ -453,31 +455,44 @@ static inline struct netfs_inode *netfs_inode(struct inode *inode) * netfs_inode_init - Initialise a netfslib inode context * @ctx: The netfs inode to initialise * @ops: The netfs's operations list + * @use_zero_point: True to use the zero_point read optimisation * * Initialise the netfs library context struct. This is expected to follow on * directly from the VFS inode struct. */ static inline void netfs_inode_init(struct netfs_inode *ctx, - const struct netfs_request_ops *ops) + const struct netfs_request_ops *ops, + bool use_zero_point) { ctx->ops = ops; ctx->remote_i_size = i_size_read(&ctx->inode); + ctx->zero_point = LLONG_MAX; ctx->flags = 0; #if IS_ENABLED(CONFIG_FSCACHE) ctx->cache = NULL; #endif + /* ->releasepage() drives zero_point */ + if (use_zero_point) { + ctx->zero_point = ctx->remote_i_size; + mapping_set_release_always(ctx->inode.i_mapping); + } } /** * netfs_resize_file - Note that a file got resized * @ctx: The netfs inode being resized * @new_i_size: The new file size + * @changed_on_server: The change was applied to the server * * Inform the netfs lib that a file got resized so that it can adjust its state. */ -static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size) +static inline void netfs_resize_file(struct netfs_inode *ctx, loff_t new_i_size, + bool changed_on_server) { - ctx->remote_i_size = new_i_size; + if (changed_on_server) + ctx->remote_i_size = new_i_size; + if (new_i_size < ctx->zero_point) + ctx->zero_point = new_i_size; } /** -- cgit v1.2.3 From 202bc57b675601bc07b5942369ecc16af64d1b95 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 9 Jan 2024 17:17:36 +0000 Subject: netfs: Don't use certain unnecessary folio_*() functions Filesystems should use folio->index and folio->mapping, instead of folio_index(folio), folio_mapping() and folio_file_mapping() since they know that it's in the pagecache. Change this automagically with: perl -p -i -e 's/folio_mapping[(]([^)]*)[)]/\1->mapping/g' fs/netfs/*.c perl -p -i -e 's/folio_file_mapping[(]([^)]*)[)]/\1->mapping/g' fs/netfs/*.c perl -p -i -e 's/folio_index[(]([^)]*)[)]/\1->index/g' fs/netfs/*.c Reported-by: Matthew Wilcox Signed-off-by: David Howells cc: Jeff Layton cc: linux-afs@lists.infradead.org cc: linux-cachefs@redhat.com cc: linux-cifs@vger.kernel.org cc: linux-erofs@lists.ozlabs.org cc: linux-fsdevel@vger.kernel.org --- fs/netfs/buffered_read.c | 12 ++++++------ fs/netfs/buffered_write.c | 10 +++++----- fs/netfs/io.c | 2 +- fs/netfs/misc.c | 2 +- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'fs/netfs/misc.c') diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c index a59e7b2edaac..3298c29b5548 100644 --- a/fs/netfs/buffered_read.c +++ b/fs/netfs/buffered_read.c @@ -101,7 +101,7 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq) } if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) { - if (folio_index(folio) == rreq->no_unlock_folio && + if (folio->index == rreq->no_unlock_folio && test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags)) _debug("no unlock"); else @@ -246,13 +246,13 @@ EXPORT_SYMBOL(netfs_readahead); */ int netfs_read_folio(struct file *file, struct folio *folio) { - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct netfs_io_request *rreq; struct netfs_inode *ctx = netfs_inode(mapping->host); struct folio *sink = NULL; int ret; - _enter("%lx", folio_index(folio)); + _enter("%lx", folio->index); rreq = netfs_alloc_request(mapping, file, folio_file_pos(folio), folio_size(folio), @@ -460,7 +460,7 @@ retry: ret = PTR_ERR(rreq); goto error; } - rreq->no_unlock_folio = folio_index(folio); + rreq->no_unlock_folio = folio->index; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); @@ -518,7 +518,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, size_t offset, size_t len) { struct netfs_io_request *rreq; - struct address_space *mapping = folio_file_mapping(folio); + struct address_space *mapping = folio->mapping; struct netfs_inode *ctx = netfs_inode(mapping->host); unsigned long long start = folio_pos(folio); size_t flen = folio_size(folio); @@ -535,7 +535,7 @@ int netfs_prefetch_for_write(struct file *file, struct folio *folio, goto error; } - rreq->no_unlock_folio = folio_index(folio); + rreq->no_unlock_folio = folio->index; __set_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags); ret = netfs_begin_cache_read(rreq, ctx); if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS) diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 93dc76f34e39..e7f9ba6fb16b 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -343,7 +343,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, break; default: WARN(true, "Unexpected modify type %u ix=%lx\n", - howto, folio_index(folio)); + howto, folio->index); ret = -EIO; goto error_folio_unlock; } @@ -648,7 +648,7 @@ static void netfs_pages_written_back(struct netfs_io_request *wreq) xas_for_each(&xas, folio, last) { WARN(!folio_test_writeback(folio), "bad %zx @%llx page %lx %lx\n", - wreq->len, wreq->start, folio_index(folio), last); + wreq->len, wreq->start, folio->index, last); if ((finfo = netfs_folio_info(folio))) { /* Streaming writes cannot be redirtied whilst under @@ -795,7 +795,7 @@ static void netfs_extend_writeback(struct address_space *mapping, continue; if (xa_is_value(folio)) break; - if (folio_index(folio) != index) { + if (folio->index != index) { xas_reset(xas); break; } @@ -901,7 +901,7 @@ static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping, long count = wbc->nr_to_write; int ret; - _enter(",%lx,%llx-%llx,%u", folio_index(folio), start, end, caching); + _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching); wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio), NETFS_WRITEBACK); @@ -1047,7 +1047,7 @@ search_again: start = folio_pos(folio); /* May regress with THPs */ - _debug("wback %lx", folio_index(folio)); + _debug("wback %lx", folio->index); /* At this point we hold neither the i_pages lock nor the page lock: * the page may be truncated or invalidated (changing page->mapping to diff --git a/fs/netfs/io.c b/fs/netfs/io.c index 4309edf33862..e8ff1e61ce79 100644 --- a/fs/netfs/io.c +++ b/fs/netfs/io.c @@ -124,7 +124,7 @@ static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq, /* We might have multiple writes from the same huge * folio, but we mustn't unlock a folio more than once. */ - if (have_unlocked && folio_index(folio) <= unlocked) + if (have_unlocked && folio->index <= unlocked) continue; unlocked = folio_next_index(folio) - 1; trace_netfs_folio(folio, netfs_folio_trace_end_copy); diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c index 0e3af37fc924..90051ced8e2a 100644 --- a/fs/netfs/misc.c +++ b/fs/netfs/misc.c @@ -180,7 +180,7 @@ void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length) struct netfs_folio *finfo = NULL; size_t flen = folio_size(folio); - _enter("{%lx},%zx,%zx", folio_index(folio), offset, length); + _enter("{%lx},%zx,%zx", folio->index, offset, length); folio_wait_fscache(folio); -- cgit v1.2.3