summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDarrick J. Wong <djwong@kernel.org>2021-10-05 10:47:36 -0700
committerDarrick J. Wong <djwong@kernel.org>2021-10-22 16:41:17 -0700
commit04e01af9318f50df5e1974d8e2ce7d54b6cfbe55 (patch)
treec9ecd431013084675617dda8097c2a07ca3f3156
parent058725b83f3ab866a225aec3bfa204c0b7f4d3a8 (diff)
xfs: map xfile pages directly into xfs_buf
Map the xfile pages directly into xfs_buf to reduce memory overhead. It's silly to use memory to stage changes to shmem pages for ephemeral btrees that don't care about transactionality. Signed-off-by: Darrick J. Wong <djwong@kernel.org>
-rw-r--r--fs/xfs/libxfs/xfs_btree_mem.h2
-rw-r--r--fs/xfs/libxfs/xfs_rmap_btree.c4
-rw-r--r--fs/xfs/libxfs/xfs_rtrmap_btree.c2
-rw-r--r--fs/xfs/scrub/trace.h2
-rw-r--r--fs/xfs/scrub/xfbtree.c11
-rw-r--r--fs/xfs/scrub/xfile.c111
-rw-r--r--fs/xfs/scrub/xfile.h22
-rw-r--r--fs/xfs/xfs_buf.c170
-rw-r--r--fs/xfs/xfs_buf.h5
-rw-r--r--fs/xfs/xfs_trans_buf.c7
10 files changed, 327 insertions, 9 deletions
diff --git a/fs/xfs/libxfs/xfs_btree_mem.h b/fs/xfs/libxfs/xfs_btree_mem.h
index ddeb05ddba4a..91cf23ee1c44 100644
--- a/fs/xfs/libxfs/xfs_btree_mem.h
+++ b/fs/xfs/libxfs/xfs_btree_mem.h
@@ -88,5 +88,7 @@ xfbtree_free_block(struct xfs_btree_cur *cur, struct xfs_buf *bp)
/* btree has long pointers */
#define XFBTREE_CREATE_LONG_PTRS (1U << 0)
+/* buffers should be directly mapped from memory */
+#define XFBTREE_DIRECT_MAP (1U << 1)
#endif /* __XFS_BTREE_MEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_rmap_btree.c b/fs/xfs/libxfs/xfs_rmap_btree.c
index 0465da82e11f..d575295fbff9 100644
--- a/fs/xfs/libxfs/xfs_rmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rmap_btree.c
@@ -551,8 +551,8 @@ xfs_rmapbt_mem_create(
struct xfs_mount *mp,
const char *name)
{
- return xfbtree_create(mp, XFS_BTNUM_RMAP, &xfs_rmapbt_buf_ops, 0,
- name);
+ return xfbtree_create(mp, XFS_BTNUM_RMAP, &xfs_rmapbt_buf_ops,
+ XFBTREE_DIRECT_MAP, name);
}
/*
diff --git a/fs/xfs/libxfs/xfs_rtrmap_btree.c b/fs/xfs/libxfs/xfs_rtrmap_btree.c
index 29de9bfbde20..6bbabf12b0c9 100644
--- a/fs/xfs/libxfs/xfs_rtrmap_btree.c
+++ b/fs/xfs/libxfs/xfs_rtrmap_btree.c
@@ -566,7 +566,7 @@ xfs_rtrmapbt_mem_create(
const char *name)
{
return xfbtree_create(mp, XFS_BTNUM_RTRMAP, &xfs_rtrmapbt_buf_ops,
- XFBTREE_CREATE_LONG_PTRS, name);
+ XFBTREE_CREATE_LONG_PTRS | XFBTREE_DIRECT_MAP, name);
}
/*
diff --git a/fs/xfs/scrub/trace.h b/fs/xfs/scrub/trace.h
index 25fa1eda4162..1342dd3653fc 100644
--- a/fs/xfs/scrub/trace.h
+++ b/fs/xfs/scrub/trace.h
@@ -921,6 +921,8 @@ DEFINE_XFILE_EVENT(xfile_pwrite);
DEFINE_XFILE_EVENT(xfile_seek_data);
DEFINE_XFILE_EVENT(xfile_discard);
DEFINE_XFILE_EVENT(xfile_prealloc);
+DEFINE_XFILE_EVENT(xfile_obj_get_page);
+DEFINE_XFILE_EVENT(xfile_obj_put_page);
TRACE_EVENT(xfarray_sort_stats,
TP_PROTO(struct xfarray *xfa, unsigned int max_stack_depth,
diff --git a/fs/xfs/scrub/xfbtree.c b/fs/xfs/scrub/xfbtree.c
index c43c18136733..1098234eec3b 100644
--- a/fs/xfs/scrub/xfbtree.c
+++ b/fs/xfs/scrub/xfbtree.c
@@ -151,8 +151,9 @@ xfs_btree_mem_head_read_buf(
{
struct xfs_mount *mp = btp->bt_mount;
- return xfs_trans_read_buf(mp, tp, btp, XFS_BTREE_MEM_HEAD_DADDR, 1, 0,
- bpp, &xfs_btree_mem_head_buf_ops);
+ return xfs_trans_read_buf(mp, tp, btp, XFS_BTREE_MEM_HEAD_DADDR,
+ XFS_FSB_TO_BB(mp, 1), 0, bpp,
+ &xfs_btree_mem_head_buf_ops);
}
/* Return tree height from the in-memory btree head */
@@ -239,6 +240,9 @@ xfbtree_create(
goto err_xfile;
}
+ if (mp->m_bsize == PAGE_SIZE && (flags & XFBTREE_DIRECT_MAP))
+ xfbt->target->bt_flags |= XFS_BUFTARG_DIRECT_MAP;
+
xfbt->freespace = kmem_alloc(sizeof(struct xbitmap),
KM_NOFS | KM_MAYFAIL);
if (!xfbt->freespace) {
@@ -264,7 +268,8 @@ xfbtree_create(
goto err_freesp;
/* Initialize the in-memory btree header block. */
- error = xfs_buf_get(xfbt->target, XFS_BTREE_MEM_HEAD_DADDR, 1, &bp);
+ error = xfs_buf_get(xfbt->target, XFS_BTREE_MEM_HEAD_DADDR,
+ XFS_FSB_TO_BB(mp, 1), &bp);
if (error)
goto err_freesp;
diff --git a/fs/xfs/scrub/xfile.c b/fs/xfs/scrub/xfile.c
index 310e96f3f1d4..764f09984c06 100644
--- a/fs/xfs/scrub/xfile.c
+++ b/fs/xfs/scrub/xfile.c
@@ -496,3 +496,114 @@ next_pgoff:
out:
return error;
}
+
+/*
+ * Grab the (locked) page for a memory object. The object cannot span a page
+ * boundary. Returns 0 (and a locked page) if successful, -ENOTBLK if we
+ * cannot grab the page, or the usual negative errno.
+ */
+int
+xfile_obj_get_page(
+ struct xfile *xf,
+ loff_t pos,
+ unsigned int len,
+ struct page **pagep)
+{
+ struct inode *inode = file_inode(xf->file);
+ struct address_space *mapping = inode->i_mapping;
+ struct page *page = NULL;
+ void *fsdata = NULL;
+ unsigned int pflags;
+ int error;
+
+ if (inode->i_sb->s_maxbytes - pos < len)
+ return -ENOMEM;
+ if (len > PAGE_SIZE - offset_in_page(pos))
+ return -ENOTBLK;
+
+ trace_xfile_obj_get_page(xf, pos, len);
+
+ pflags = memalloc_nofs_save();
+
+ /*
+ * We call pagecache_write_begin directly here to avoid all the freezer
+ * protection lock-taking that happens in the normal path. shmem
+ * doesn't support fs freeze, but lockdep doesn't know that and will
+ * trip over that.
+ */
+ error = pagecache_write_begin(NULL, mapping, pos, len, AOP_FLAG_NOFS,
+ &page, &fsdata);
+ if (error)
+ goto out_pflags;
+
+ /*
+ * We don't support passing fsdata to the caller and back to
+ * xfile_put_page, so if we get a non-null pointer we just bail out.
+ */
+ if (fsdata != NULL || PageHWPoison(page)) {
+ int ret;
+
+ ASSERT(fsdata != NULL);
+ ret = pagecache_write_end(NULL, mapping, pos, len, 0, page,
+ fsdata);
+ if (ret < 0)
+ error = ret;
+ else
+ error = -ENOTBLK;
+ goto out_pflags;
+ }
+
+ /* We got the page, so make sure we push out EOF. */
+ if (i_size_read(inode) < pos + len)
+ i_size_write(inode, pos + len);
+
+ /*
+ * If the page isn't up to date, fill it with zeroes before we hand it
+ * to the caller and make sure the backing store will hold on to them.
+ */
+ if (!PageUptodate(page)) {
+ void *kaddr;
+
+ kaddr = kmap_local_page(page);
+ memset(kaddr, 0, PAGE_SIZE);
+ kunmap_local(kaddr);
+ SetPageUptodate(page);
+ set_page_dirty(page);
+ }
+
+ *pagep = page;
+out_pflags:
+ memalloc_nofs_restore(pflags);
+ return error;
+}
+
+/*
+ * Release the (locked) page for a memory object. The page must have been
+ * obtained by xfile_obj_get_page. Returns 0 or a negative errno.
+ */
+int
+xfile_obj_put_page(
+ struct xfile *xf,
+ loff_t pos,
+ unsigned int len,
+ struct page *page)
+{
+ struct inode *inode = file_inode(xf->file);
+ struct address_space *mapping = inode->i_mapping;
+ unsigned int pflags;
+ int ret;
+
+ ASSERT(len <= PAGE_SIZE - offset_in_page(pos));
+
+ trace_xfile_obj_put_page(xf, pos, len);
+
+ pflags = memalloc_nofs_save();
+ ret = pagecache_write_end(NULL, mapping, pos, len, len, page, NULL);
+ memalloc_nofs_restore(pflags);
+
+ if (ret < 0)
+ return ret;
+ if (ret != len)
+ return -EIO;
+ return 0;
+}
diff --git a/fs/xfs/scrub/xfile.h b/fs/xfs/scrub/xfile.h
index c6f6f56b4d0f..e63d61a380fe 100644
--- a/fs/xfs/scrub/xfile.h
+++ b/fs/xfs/scrub/xfile.h
@@ -57,6 +57,10 @@ struct xfile_stat {
int xfile_stat(struct xfile *xf, struct xfile_stat *statbuf);
int xfile_dump(struct xfile *xf);
+int xfile_obj_get_page(struct xfile *xf, loff_t offset, unsigned int len,
+ struct page **pagep);
+int xfile_obj_put_page(struct xfile *xf, loff_t offset, unsigned int len,
+ struct page *page);
#else
static inline int
xfile_obj_load(struct xfile *xf, void *buf, size_t count, loff_t offset)
@@ -69,6 +73,24 @@ xfile_obj_store(struct xfile *xf, void *buf, size_t count, loff_t offset)
{
return -EIO;
}
+static inline int
+xfile_obj_get_page(
+ struct xfile *xf,
+ loff_t offset,
+ unsigned int len,
+ struct page **pagep)
+{
+ return -EIO;
+}
+static inline int
+xfile_obj_put_page(
+ struct xfile *xf,
+ loff_t offset,
+ unsigned int len,
+ struct page *page)
+{
+ return -EIO;
+}
#endif /* CONFIG_XFS_ONLINE_SCRUB */
#endif /* __XFS_SCRUB_XFILE_H__ */
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index b8114a4d9761..5d1eebee2555 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -271,6 +271,45 @@ _xfs_buf_alloc(
}
static void
+xfs_buf_free_direct_pages(
+ struct xfs_buf *bp)
+{
+ struct xfs_buf_map *map;
+ unsigned int m, p, n;
+ int error = 0, err2;
+
+ ASSERT(bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP);
+
+ if (xfs_buf_is_vmapped(bp))
+ vm_unmap_ram(bp->b_addr, bp->b_page_count);
+
+ for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) {
+ for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) {
+ struct page *page = bp->b_pages[p];
+ unsigned int len;
+
+ len = min_t(unsigned int, BBTOB(map->bm_len - n),
+ PAGE_SIZE);
+
+ lock_page(page);
+ err2 = xfile_obj_put_page(bp->b_target->bt_xfile,
+ BBTOB(map->bm_bn + n), len, page);
+ if (!error && err2)
+ error = err2;
+ bp->b_pages[p++] = NULL;
+ }
+ }
+
+ if (error)
+ xfs_err(bp->b_mount, "%s failed errno %d", __func__, error);
+
+ if (bp->b_pages != bp->b_page_array)
+ kmem_free(bp->b_pages);
+ bp->b_pages = NULL;
+ bp->b_flags &= ~_XBF_DIRECT_MAP;
+}
+
+static void
xfs_buf_free_pages(
struct xfs_buf *bp)
{
@@ -302,7 +341,9 @@ xfs_buf_free(
ASSERT(list_empty(&bp->b_lru));
- if (bp->b_flags & _XBF_PAGES)
+ if (bp->b_flags & _XBF_DIRECT_MAP)
+ xfs_buf_free_direct_pages(bp);
+ else if (bp->b_flags & _XBF_PAGES)
xfs_buf_free_pages(bp);
else if (bp->b_flags & _XBF_KMEM)
kmem_free(bp->b_addr);
@@ -401,6 +442,93 @@ xfs_buf_alloc_pages(
}
/*
+ * Try to map storage directly, if the target supports it. Returns 0 for
+ * success, -ENOTBLK to mean "not supported", or the usual negative errno.
+ */
+static int
+xfs_buf_alloc_direct_pages(
+ struct xfs_buf *bp,
+ xfs_buf_flags_t flags)
+{
+ struct xfs_buf_map *map;
+ gfp_t gfp_mask = __GFP_NOWARN;
+ const unsigned int page_align_mask = PAGE_SIZE - 1;
+ unsigned int m, p, n;
+ int error;
+
+ ASSERT(bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY);
+
+ /* For direct-map buffers, each map has to be page aligned. */
+ for (m = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++)
+ if (BBTOB(map->bm_bn | map->bm_len) & page_align_mask)
+ return -ENOTBLK;
+
+ if (flags & XBF_READ_AHEAD)
+ gfp_mask |= __GFP_NORETRY;
+ else
+ gfp_mask |= GFP_NOFS;
+
+ /* Make sure that we have a page list */
+ bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
+ if (bp->b_page_count <= XB_PAGES) {
+ bp->b_pages = bp->b_page_array;
+ } else {
+ bp->b_pages = kzalloc(sizeof(struct page *) * bp->b_page_count,
+ gfp_mask);
+ if (!bp->b_pages)
+ return -ENOMEM;
+ }
+
+ /* Map in the xfile pages. */
+ for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) {
+ for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) {
+ unsigned int len;
+
+ len = min_t(unsigned int, BBTOB(map->bm_len - n),
+ PAGE_SIZE);
+
+ error = xfile_obj_get_page(bp->b_target->bt_xfile,
+ BBTOB(map->bm_bn + n), len,
+ &bp->b_pages[p++]);
+ if (error)
+ goto fail;
+ }
+ }
+
+ /* Unlock all the pages now that we've grabbed them all. */
+ for (p = 0; p < bp->b_page_count; p++) {
+ ASSERT(PageUptodate(bp->b_pages[p]));
+ unlock_page(bp->b_pages[p]);
+ }
+
+ bp->b_flags |= _XBF_DIRECT_MAP;
+ return 0;
+
+fail:
+ for (m = 0, p = 0, map = bp->b_maps; m < bp->b_map_count; m++, map++) {
+ for (n = 0; n < map->bm_len; n += BTOBB(PAGE_SIZE)) {
+ struct page *page = bp->b_pages[p++];
+ unsigned int len;
+
+ if (!page)
+ continue;
+
+ len = min_t(unsigned int, BBTOB(map->bm_len - n),
+ PAGE_SIZE);
+
+ xfile_obj_put_page(bp->b_target->bt_xfile,
+ BBTOB(map->bm_bn + n), len, page);
+ }
+ }
+
+ if (bp->b_pages != bp->b_page_array)
+ kmem_free(bp->b_pages);
+ bp->b_pages = NULL;
+ bp->b_page_count = 0;
+ return error;
+}
+
+/*
* Map buffer into kernel address-space if necessary.
*/
STATIC int
@@ -408,7 +536,8 @@ _xfs_buf_map_pages(
struct xfs_buf *bp,
uint flags)
{
- ASSERT(bp->b_flags & _XBF_PAGES);
+ ASSERT(bp->b_flags & (_XBF_PAGES | _XBF_DIRECT_MAP));
+
if (bp->b_page_count == 1) {
/* A single page buffer is always mappable */
bp->b_addr = page_address(bp->b_pages[0]);
@@ -625,7 +754,7 @@ found:
*/
if (bp->b_flags & XBF_STALE) {
ASSERT((bp->b_flags & _XBF_DELWRI_Q) == 0);
- bp->b_flags &= _XBF_KMEM | _XBF_PAGES;
+ bp->b_flags &= _XBF_KMEM | _XBF_PAGES | _XBF_DIRECT_MAP;
bp->b_ops = NULL;
}
@@ -680,6 +809,13 @@ xfs_buf_get_map(
if (error)
return error;
+ /* Try to map pages directly, or fall back to memory. */
+ if (target->bt_flags & XFS_BUFTARG_DIRECT_MAP) {
+ error = xfs_buf_alloc_direct_pages(new_bp, flags);
+ if (error && error != -ENOTBLK)
+ goto out_free_buf;
+ }
+
/*
* For buffers that fit entirely within a single page, first attempt to
* allocate the memory from the heap to minimise memory usage. If we
@@ -1543,6 +1679,29 @@ xfs_buf_ioapply_in_memory(
xfs_buf_ioend(bp);
}
+void
+xfs_buf_ioapply_direct_pages(
+ struct xfs_buf *bp,
+ bool is_write)
+{
+ unsigned int i;
+ bool ioerr = false;
+
+ for (i = 0; i < bp->b_page_count; i++) {
+ struct page *page = bp->b_pages[i];
+
+ lock_page(page);
+ if (is_write)
+ set_page_dirty(page);
+ ioerr |= PageHWPoison(page);
+ unlock_page(page);
+ }
+
+ if (ioerr)
+ cmpxchg(&bp->b_io_error, 0, -EIO);
+}
+
+
STATIC void
_xfs_buf_ioapply(
struct xfs_buf *bp)
@@ -1600,6 +1759,11 @@ _xfs_buf_ioapply(
/* we only use the buffer cache for meta-data */
op |= REQ_META;
+ if (bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP) {
+ xfs_buf_ioapply_direct_pages(bp, bp->b_flags & XBF_WRITE);
+ return;
+ }
+
if (bp->b_target->bt_flags & XFS_BUFTARG_IN_MEMORY) {
xfs_buf_ioapply_in_memory(bp);
return;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 5328db3af1e3..30eafc6d3775 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -41,6 +41,7 @@ struct xfile;
#define _XBF_PAGES (1 << 20)/* backed by refcounted pages */
#define _XBF_KMEM (1 << 21)/* backed by heap memory */
#define _XBF_DELWRI_Q (1 << 22)/* buffer on a delwri queue */
+#define _XBF_DIRECT_MAP (1 << 23)/* pages directly mapped to storage */
/* flags used only as arguments to access routines */
#define _XBF_IGNORE_STALE (1 << 29)/* ignore stale buffers */
@@ -64,6 +65,7 @@ typedef unsigned int xfs_buf_flags_t;
{ _XBF_PAGES, "PAGES" }, \
{ _XBF_KMEM, "KMEM" }, \
{ _XBF_DELWRI_Q, "DELWRI_Q" }, \
+ { _XBF_DIRECT_MAP, "DIRECT_MAP" }, \
/* The following interface flags should never be set */ \
{ _XBF_IGNORE_STALE, "IGNORE_STALE" }, \
{ XBF_TRYLOCK, "TRYLOCK" }, \
@@ -119,6 +121,8 @@ typedef struct xfs_buftarg {
#define XFS_BUFTARG_SELF_CACHED (1U << 0)
/* in-memory buftarg */
#define XFS_BUFTARG_IN_MEMORY (1U << 1)
+/* buffer pages are direct-mapped (implies IN_MEMORY) */
+#define XFS_BUFTARG_DIRECT_MAP (1U << 2)
static inline bool
xfs_buftarg_in_memory(
@@ -424,5 +428,6 @@ xfs_buftarg_zeroout(
int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
+void xfs_buf_ioapply_direct_pages(struct xfs_buf *bp, bool is_write);
#endif /* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 6549e50d852c..9cbd19531eb0 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -461,6 +461,13 @@ xfs_trans_dirty_buf(
ASSERT(atomic_read(&bip->bli_refcount) > 0);
/*
+ * For buffers that are directly mapped to an in-memory file, mark the
+ * pages dirty so that they'll be persisted properly.
+ */
+ if (bp->b_target->bt_flags & XFS_BUFTARG_DIRECT_MAP)
+ xfs_buf_ioapply_direct_pages(bp, true);
+
+ /*
* If we invalidated the buffer within this transaction, then
* cancel the invalidation now that we're dirtying the buffer
* again. There are no races with the code in xfs_buf_item_unpin(),