diff options
author | Darrick J. Wong <djwong@kernel.org> | 2021-10-05 10:47:36 -0700 |
---|---|---|
committer | Darrick J. Wong <djwong@kernel.org> | 2021-10-22 16:41:17 -0700 |
commit | 04e01af9318f50df5e1974d8e2ce7d54b6cfbe55 (patch) | |
tree | c9ecd431013084675617dda8097c2a07ca3f3156 | |
parent | 058725b83f3ab866a225aec3bfa204c0b7f4d3a8 (diff) |
xfs: map xfile pages directly into xfs_buf
Map the xfile pages directly into xfs_buf to reduce memory overhead.
It's silly to use memory to stage changes to shmem pages for ephemeral
btrees that don't care about transactionality.
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r-- | fs/xfs/libxfs/xfs_btree_mem.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rmap_btree.c | 4 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rtrmap_btree.c | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/trace.h | 2 | ||||
-rw-r--r-- | fs/xfs/scrub/xfbtree.c | 11 | ||||
-rw-r--r-- | fs/xfs/scrub/xfile.c | 111 | ||||
-rw-r--r-- | fs/xfs/scrub/xfile.h | 22 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.c | 170 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 5 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_buf.c | 7 |
10 files changed, 327 insertions, 9 deletions
diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h index ddeb05ddba4a..91cf23ee1c44 100644 --- a/fs/xfs/libxfs/xfs_btree_mem.h +++ b/fs/xfs/libxfs/xfs_btree_mem.h @@ -88,5 +88,7 @@ xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp) /* btree has long pointers */ #define XFBTREE_CREATE_LONG_PTRS (1U << 0) +/* buffers should be directly mapped from memory */ +#define XFBTREE_DIRECT_MAP (1U << 1) #endif /* __XFS_BTREE_MEM_H__ */ diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c index 0465da82e11f..d575295fbff9 100644 --- a/fs/xfs/libxfs/xfs_rmap_btree.c +++ b/fs/xfs/libxfs/xfs_rmap_btree.c @@ -551,8 +551,8 @@ xfs_rmapbt_mem_create( struct xfs_mount *mp, const char *name) { - return xfbtree_create(mp, XFS_BTNUM_RMAP, &xfs_rmapbt_buf_ops, 0, - name); + return xfbtree_create(mp, XFS_BTNUM_RMAP, &xfs_rmapbt_buf_ops, + XFBTREE_DIRECT_MAP, name); } /* diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c index 29de9bfbde20..6bbabf12b0c9 100644 --- a/fs/xfs/libxfs/xfs_rtrmap_btree.c +++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c @@ -566,7 +566,7 @@ xfs_rtrmapbt_mem_create( const char *name) { return xfbtree_create(mp, XFS_BTNUM_RTRMAP, &xfs_rtrmapbt_buf_ops, - XFBTREE_CREATE_LONG_PTRS, name); + XFBTREE_CREATE_LONG_PTRS | XFBTREE_DIRECT_MAP, name); } /* diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h index 25fa1eda4162..1342dd3653fc 100644 --- a/fs/xfs/scrub/trace.h +++ b/fs/xfs/scrub/trace.h @@ -921,6 +921,8 @@ DEFINE_XFILE_EVENT(xfile_pwrite); DEFINE_XFILE_EVENT(xfile_seek_data); DEFINE_XFILE_EVENT(xfile_discard); DEFINE_XFILE_EVENT(xfile_prealloc); +DEFINE_XFILE_EVENT(xfile_obj_get_page); +DEFINE_XFILE_EVENT(xfile_obj_put_page); TRACE_EVENT(xfarray_sort_stats, TP_PROTO(struct xfarray *xfa, unsigned int max_stack_depth, diff --git a/fs/xfs/scrub/xfbtree.c b/fs/xfs/scrub/xfbtree.c index c43c18136733..1098234eec3b 100644 --- a/fs/xfs/scrub/xfbtree.c +++ b/fs/xfs/scrub/xfbtree.c @@ -151,8 +151,9 @@ xfs_btree_mem_head_read_buf( { struct xfs_mount *mp = btp->bt_mount; - return xfs_trans_read_buf(mp, tp, btp, XFS_BTREE_MEM_HEAD_DADDR, 1, 0, - bpp, &xfs_btree_mem_head_buf_ops); + return xfs_trans_read_buf(mp, tp, btp, XFS_BTREE_MEM_HEAD_DADDR, + XFS_FSB_TO_BB(mp, 1), 0, bpp, + &xfs_btree_mem_head_buf_ops); } /* Return tree height from the in-memory btree head */ @@ -239,6 +240,9 @@ xfbtree_create( goto err_xfile; } + if (mp->m_bsize == PAGE_SIZE && (flags & XFBTREE_DIRECT_MAP)) + xfbt->target->bt_flags |= XFS_BUFTARG_DIRECT_MAP; + xfbt->freespace = kmem_alloc(sizeof(struct xbitmap), KM_NOFS | KM_MAYFAIL); if (!xfbt->freespace) { @@ -264,7 +268,8 @@ xfbtree_create( goto err_freesp; /* Initialize the in-memory btree header block. */ - error = xfs_buf_get(xfbt->target, XFS_BTREE_MEM_HEAD_DADDR, 1, &bp); + error = xfs_buf_get(xfbt->target, XFS_BTREE_MEM_HEAD_DADDR, + XFS_FSB_TO_BB(mp, 1), &bp); if (error) goto err_freesp; diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c index 310e96f3f1d4..764f09984c06 100644 --- a/fs/xfs/scrub/xfile.c +++ b/fs/xfs/scrub/xfile.c @@ -496,3 +496,114 @@ next_pgoff: out: return error; } + +/* + * Grab the (locked) page for a memory object. The object cannot span a page + * boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we + * cannot grab the page, or the usual negative errno. + */ +int +xfile_obj_get_page( + struct xfile *xf, + loff_t pos, + unsigned int len, + struct page **pagep) +{ + struct inode *inode = file_inode(xf->file); + struct address_space *mapping = inode->i_mapping; + struct page *page = NULL; + void *fsdata = NULL; + unsigned int pflags; + int error; + + if (inode->i_sb->s_maxbytes - pos < len) + return -ENOMEM; + if (len > PAGE_SIZE - offset_in_page(pos)) + return -ENOTBLK; + + trace_xfile_obj_get_page(xf, pos, len); + + pflags = memalloc_nofs_save(); + + /* + * We call pagecache_write_begin directly here to avoid all the freezer + * protection lock-taking that happens in the normal path. shmem + * doesn't support fs freeze, but lockdep doesn't know that and will + * trip over that. + */ + error = pagecache_write_begin(NULL, mapping, pos, len, AOP_FLAG_NOFS, + &page, &fsdata); + if (error) + goto out_pflags; + + /* + * We don't support passing fsdata to the caller and back to + * xfile_put_page, so if we get a non-null pointer we just bail out. + */ + if (fsdata != NULL || PageHWPoison(page)) { + int ret; + + ASSERT(fsdata != NULL); + ret = pagecache_write_end(NULL, mapping, pos, len, 0, page, + fsdata); + if (ret < 0) + error = ret; + else + error = -ENOTBLK; + goto out_pflags; + } + + /* We got the page, so make sure we push out EOF. */ + if (i_size_read(inode) < pos + len) + i_size_write(inode, pos + len); + + /* + * If the page isn't up to date, fill it with zeroes before we hand it + * to the caller and make sure the backing store will hold on to them. + */ + if (!PageUptodate(page)) { + void *kaddr; + + kaddr = kmap_local_page(page); + memset(kaddr, 0, PAGE_SIZE); + kunmap_local(kaddr); + SetPageUptodate(page); + set_page_dirty(page); + } + + *pagep = page; +out_pflags: + memalloc_nofs_restore(pflags); + return error; +} + +/* + * Release the (locked) page for a memory object. The page must have been + * obtained by xfile_obj_get_page. Returns 0 or a negative errno. + */ +int +xfile_obj_put_page( + struct xfile *xf, + loff_t pos, + unsigned int len, + struct page *page) +{ + struct inode *inode = file_inode(xf->file); + struct address_space *mapping = inode->i_mapping; + unsigned int pflags; + int ret; + + ASSERT(len <= PAGE_SIZE - offset_in_page(pos)); + + trace_xfile_obj_put_page(xf, pos, len); + + pflags = memalloc_nofs_save(); + ret = pagecache_write_end(NULL, mapping, pos, len, len, page, NULL); + memalloc_nofs_restore(pflags); + + if (ret < 0) + return ret; + if (ret != len) + return -EIO; + return 0; +} diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h index c6f6f56b4d0f..e63d61a380fe 100644 --- a/fs/xfs/scrub/xfile.h +++ b/fs/xfs/scrub/xfile.h @@ -57,6 +57,10 @@ struct xfile_stat { int xfile_stat(struct xfile *xf, struct xfile_stat *statbuf); int xfile_dump(struct xfile *xf); +int xfile_obj_get_page(struct xfile *xf, loff_t offset, unsigned int len, + struct page **pagep); +int xfile_obj_put_page(struct xfile *xf, loff_t offset, unsigned int len, + struct page *page); #else static inline int xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset) @@ -69,6 +73,24 @@ xfile_obj_store(struct xfile *xf, void *buf, size_t count, loff_t offset) { return -EIO; } +static inline int +xfile_obj_get_page( + struct xfile *xf, + loff_t offset, + unsigned int len, + struct page **pagep) +{ + return -EIO; +} +static inline int +xfile_obj_put_page( + struct xfile *xf, + loff_t offset, + unsigned int len, + struct page *page) +{ + return -EIO; +} #endif /* CONFIG_XFS_ONLINE_SCRUB */ #endif /* __XFS_SCRUB_XFILE_H__ */ diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index b8114a4d9761..5d1eebee2555 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -271,6 +271,45 @@ _xfs_buf_alloc( } static void +xfs_buf_free_direct_pages( + struct xfs_buf *bp) +{ + struct xfs_buf_map *map; + unsigned int m, p, n; + int error = 0, err2; + + ASSERT(bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP); + + if (xfs_buf_is_vmapped(bp)) + vm_unmap_ram(bp->b_addr, bp->b_page_count); + + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + struct page *page = bp->b_pages[p]; + unsigned int len; + + len = min_t(unsigned int, BBTOB(map->bm_len - n), + PAGE_SIZE); + + lock_page(page); + err2 = xfile_obj_put_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), len, page); + if (!error && err2) + error = err2; + bp->b_pages[p++] = NULL; + } + } + + if (error) + xfs_err(bp->b_mount, "%s failed errno %d", __func__, error); + + if (bp->b_pages != bp->b_page_array) + kmem_free(bp->b_pages); + bp->b_pages = NULL; + bp->b_flags &= ~_XBF_DIRECT_MAP; +} + +static void xfs_buf_free_pages( struct xfs_buf *bp) { @@ -302,7 +341,9 @@ xfs_buf_free( ASSERT(list_empty(&bp->b_lru)); - if (bp->b_flags & _XBF_PAGES) + if (bp->b_flags & _XBF_DIRECT_MAP) + xfs_buf_free_direct_pages(bp); + else if (bp->b_flags & _XBF_PAGES) xfs_buf_free_pages(bp); else if (bp->b_flags & _XBF_KMEM) kmem_free(bp->b_addr); @@ -401,6 +442,93 @@ xfs_buf_alloc_pages( } /* + * Try to map storage directly, if the target supports it. Returns 0 for + * success, -ENOTBLK to mean "not supported", or the usual negative errno. + */ +static int +xfs_buf_alloc_direct_pages( + struct xfs_buf *bp, + xfs_buf_flags_t flags) +{ + struct xfs_buf_map *map; + gfp_t gfp_mask = __GFP_NOWARN; + const unsigned int page_align_mask = PAGE_SIZE - 1; + unsigned int m, p, n; + int error; + + ASSERT(bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY); + + /* For direct-map buffers, each map has to be page aligned. */ + for (m = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) + if (BBTOB(map->bm_bn | map->bm_len) & page_align_mask) + return -ENOTBLK; + + if (flags & XBF_READ_AHEAD) + gfp_mask |= __GFP_NORETRY; + else + gfp_mask |= GFP_NOFS; + + /* Make sure that we have a page list */ + bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE); + if (bp->b_page_count <= XB_PAGES) { + bp->b_pages = bp->b_page_array; + } else { + bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count, + gfp_mask); + if (!bp->b_pages) + return -ENOMEM; + } + + /* Map in the xfile pages. */ + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + unsigned int len; + + len = min_t(unsigned int, BBTOB(map->bm_len - n), + PAGE_SIZE); + + error = xfile_obj_get_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), len, + &bp->b_pages[p++]); + if (error) + goto fail; + } + } + + /* Unlock all the pages now that we've grabbed them all. */ + for (p = 0; p < bp->b_page_count; p++) { + ASSERT(PageUptodate(bp->b_pages[p])); + unlock_page(bp->b_pages[p]); + } + + bp->b_flags |= _XBF_DIRECT_MAP; + return 0; + +fail: + for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) { + for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) { + struct page *page = bp->b_pages[p++]; + unsigned int len; + + if (!page) + continue; + + len = min_t(unsigned int, BBTOB(map->bm_len - n), + PAGE_SIZE); + + xfile_obj_put_page(bp->b_target->bt_xfile, + BBTOB(map->bm_bn + n), len, page); + } + } + + if (bp->b_pages != bp->b_page_array) + kmem_free(bp->b_pages); + bp->b_pages = NULL; + bp->b_page_count = 0; + return error; +} + +/* * Map buffer into kernel address-space if necessary. */ STATIC int @@ -408,7 +536,8 @@ _xfs_buf_map_pages( struct xfs_buf *bp, uint flags) { - ASSERT(bp->b_flags & _XBF_PAGES); + ASSERT(bp->b_flags & (_XBF_PAGES | _XBF_DIRECT_MAP)); + if (bp->b_page_count == 1) { /* A single page buffer is always mappable */ bp->b_addr = page_address(bp->b_pages[0]); @@ -625,7 +754,7 @@ found: */ if (bp->b_flags & XBF_STALE) { ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0); - bp->b_flags &= _XBF_KMEM | _XBF_PAGES; + bp->b_flags &= _XBF_KMEM | _XBF_PAGES | _XBF_DIRECT_MAP; bp->b_ops = NULL; } @@ -680,6 +809,13 @@ xfs_buf_get_map( if (error) return error; + /* Try to map pages directly, or fall back to memory. */ + if (target->bt_flags & XFS_BUFTARG_DIRECT_MAP) { + error = xfs_buf_alloc_direct_pages(new_bp, flags); + if (error && error != -ENOTBLK) + goto out_free_buf; + } + /* * For buffers that fit entirely within a single page, first attempt to * allocate the memory from the heap to minimise memory usage. If we @@ -1543,6 +1679,29 @@ xfs_buf_ioapply_in_memory( xfs_buf_ioend(bp); } +void +xfs_buf_ioapply_direct_pages( + struct xfs_buf *bp, + bool is_write) +{ + unsigned int i; + bool ioerr = false; + + for (i = 0; i < bp->b_page_count; i++) { + struct page *page = bp->b_pages[i]; + + lock_page(page); + if (is_write) + set_page_dirty(page); + ioerr |= PageHWPoison(page); + unlock_page(page); + } + + if (ioerr) + cmpxchg(&bp->b_io_error, 0, -EIO); +} + + STATIC void _xfs_buf_ioapply( struct xfs_buf *bp) @@ -1600,6 +1759,11 @@ _xfs_buf_ioapply( /* we only use the buffer cache for meta-data */ op |= REQ_META; + if (bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP) { + xfs_buf_ioapply_direct_pages(bp, bp->b_flags & XBF_WRITE); + return; + } + if (bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY) { xfs_buf_ioapply_in_memory(bp); return; diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 5328db3af1e3..30eafc6d3775 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -41,6 +41,7 @@ struct xfile; #define _XBF_PAGES (1 << 20)/* backed by refcounted pages */ #define _XBF_KMEM (1 << 21)/* backed by heap memory */ #define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */ +#define _XBF_DIRECT_MAP (1 << 23)/* pages directly mapped to storage */ /* flags used only as arguments to access routines */ #define _XBF_IGNORE_STALE (1 << 29)/* ignore stale buffers */ @@ -64,6 +65,7 @@ typedef unsigned int xfs_buf_flags_t; { _XBF_PAGES, "PAGES" }, \ { _XBF_KMEM, "KMEM" }, \ { _XBF_DELWRI_Q, "DELWRI_Q" }, \ + { _XBF_DIRECT_MAP, "DIRECT_MAP" }, \ /* The following interface flags should never be set */ \ { _XBF_IGNORE_STALE, "IGNORE_STALE" }, \ { XBF_TRYLOCK, "TRYLOCK" }, \ @@ -119,6 +121,8 @@ typedef struct xfs_buftarg { #define XFS_BUFTARG_SELF_CACHED (1U << 0) /* in-memory buftarg */ #define XFS_BUFTARG_IN_MEMORY (1U << 1) +/* buffer pages are direct-mapped (implies IN_MEMORY) */ +#define XFS_BUFTARG_DIRECT_MAP (1U << 2) static inline bool xfs_buftarg_in_memory( @@ -424,5 +428,6 @@ xfs_buftarg_zeroout( int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops); bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic); bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic); +void xfs_buf_ioapply_direct_pages(struct xfs_buf *bp, bool is_write); #endif /* __XFS_BUF_H__ */ diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 6549e50d852c..9cbd19531eb0 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -461,6 +461,13 @@ xfs_trans_dirty_buf( ASSERT(atomic_read(&bip->bli_refcount) > 0); /* + * For buffers that are directly mapped to an in-memory file, mark the + * pages dirty so that they'll be persisted properly. + */ + if (bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP) + xfs_buf_ioapply_direct_pages(bp, true); + + /* * If we invalidated the buffer within this transaction, then * cancel the invalidation now that we're dirtying the buffer * again. There are no races with the code in xfs_buf_item_unpin(), |