From 10634530f7ba947d8eab52a580e0840778d4ef75 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 16 Jan 2024 09:59:39 +1100
Subject: xfs: convert kmem_zalloc() to kzalloc()

There's no reason to keep the kmem_zalloc() around anymore, it's
just a thin wrapper around kmalloc(), so lets get rid of it.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/xfs_buf.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8e5bd50d29fe..5672665150ea 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -189,8 +189,8 @@ xfs_buf_get_maps(
 		return 0;
 	}
 
-	bp->b_maps = kmem_zalloc(map_count * sizeof(struct xfs_buf_map),
-				KM_NOFS);
+	bp->b_maps = kzalloc(map_count * sizeof(struct xfs_buf_map),
+				GFP_NOFS | __GFP_NOFAIL);
 	if (!bp->b_maps)
 		return -ENOMEM;
 	return 0;
@@ -2002,7 +2002,7 @@ xfs_alloc_buftarg(
 #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
 	ops = &xfs_dax_holder_operations;
 #endif
-	btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
+	btp = kzalloc(sizeof(*btp), GFP_NOFS | __GFP_NOFAIL);
 
 	btp->bt_mount = mp;
 	btp->bt_bdev_handle = bdev_handle;
-- 
cgit v1.2.3


From f078d4ea827607867d42fb3b2ef907caf86ce49d Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 16 Jan 2024 09:59:40 +1100
Subject: xfs: convert kmem_alloc() to kmalloc()

kmem_alloc() is just a thin wrapper around kmalloc() these days.
Convert everything to use kmalloc() so we can get rid of the
wrapper.

Note: the transaction region allocation in xlog_add_to_transaction()
can be a high order allocation. Converting it to use
kmalloc(__GFP_NOFAIL) results in warnings in the page allocation
code being triggered because the mm subsystem does not want us to
use __GFP_NOFAIL with high order allocations like we've been doing
with the kmem_alloc() wrapper for a couple of decades. Hence this
specific case gets converted to xlog_kvmalloc() rather than
kmalloc() to avoid this issue.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/Makefile                   |  3 +--
 fs/xfs/kmem.c                     | 30 ----------------------------
 fs/xfs/kmem.h                     | 42 ---------------------------------------
 fs/xfs/libxfs/xfs_attr_leaf.c     |  7 +++----
 fs/xfs/libxfs/xfs_btree_staging.c |  4 ++--
 fs/xfs/libxfs/xfs_da_btree.c      |  3 ++-
 fs/xfs/libxfs/xfs_dir2.c          |  2 +-
 fs/xfs/libxfs/xfs_dir2_block.c    |  2 +-
 fs/xfs/libxfs/xfs_dir2_sf.c       |  8 ++++----
 fs/xfs/libxfs/xfs_inode_fork.c    | 15 +++++++-------
 fs/xfs/xfs_attr_list.c            |  2 +-
 fs/xfs/xfs_buf.c                  |  6 +++---
 fs/xfs/xfs_buf_item_recover.c     |  2 +-
 fs/xfs/xfs_filestream.c           |  2 +-
 fs/xfs/xfs_inode_item_recover.c   |  3 ++-
 fs/xfs/xfs_iwalk.c                |  2 +-
 fs/xfs/xfs_log_recover.c          |  2 +-
 fs/xfs/xfs_qm.c                   |  3 ++-
 fs/xfs/xfs_rtalloc.c              |  2 +-
 fs/xfs/xfs_super.c                |  2 +-
 fs/xfs/xfs_trace.h                | 25 -----------------------
 21 files changed, 36 insertions(+), 131 deletions(-)
 delete mode 100644 fs/xfs/kmem.c

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index fbe3cdc79036..35a23427055b 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -92,8 +92,7 @@ xfs-y				+= xfs_aops.o \
 				   xfs_symlink.o \
 				   xfs_sysfs.o \
 				   xfs_trans.o \
-				   xfs_xattr.o \
-				   kmem.o
+				   xfs_xattr.o
 
 # low-level transaction/log code
 xfs-y				+= xfs_log.o \
diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c
deleted file mode 100644
index c557a030acfe..000000000000
--- a/fs/xfs/kmem.c
+++ /dev/null
@@ -1,30 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#include "xfs.h"
-#include "xfs_message.h"
-#include "xfs_trace.h"
-
-void *
-kmem_alloc(size_t size, xfs_km_flags_t flags)
-{
-	int	retries = 0;
-	gfp_t	lflags = kmem_flags_convert(flags);
-	void	*ptr;
-
-	trace_kmem_alloc(size, flags, _RET_IP_);
-
-	do {
-		ptr = kmalloc(size, lflags);
-		if (ptr || (flags & KM_MAYFAIL))
-			return ptr;
-		if (!(++retries % 100))
-			xfs_err(NULL,
-	"%s(%u) possible memory allocation deadlock size %u in %s (mode:0x%x)",
-				current->comm, current->pid,
-				(unsigned int)size, __func__, lflags);
-		memalloc_retry_wait(lflags);
-	} while (1);
-}
diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
index bce31182c9e8..1343f1a6f99b 100644
--- a/fs/xfs/kmem.h
+++ b/fs/xfs/kmem.h
@@ -15,48 +15,6 @@
  * General memory allocation interfaces
  */
 
-typedef unsigned __bitwise xfs_km_flags_t;
-#define KM_NOFS		((__force xfs_km_flags_t)0x0004u)
-#define KM_MAYFAIL	((__force xfs_km_flags_t)0x0008u)
-#define KM_ZERO		((__force xfs_km_flags_t)0x0010u)
-#define KM_NOLOCKDEP	((__force xfs_km_flags_t)0x0020u)
-
-/*
- * We use a special process flag to avoid recursive callbacks into
- * the filesystem during transactions.  We will also issue our own
- * warnings, so we explicitly skip any generic ones (silly of us).
- */
-static inline gfp_t
-kmem_flags_convert(xfs_km_flags_t flags)
-{
-	gfp_t	lflags;
-
-	BUG_ON(flags & ~(KM_NOFS | KM_MAYFAIL | KM_ZERO | KM_NOLOCKDEP));
-
-	lflags = GFP_KERNEL | __GFP_NOWARN;
-	if (flags & KM_NOFS)
-		lflags &= ~__GFP_FS;
-
-	/*
-	 * Default page/slab allocator behavior is to retry for ever
-	 * for small allocations. We can override this behavior by using
-	 * __GFP_RETRY_MAYFAIL which will tell the allocator to retry as long
-	 * as it is feasible but rather fail than retry forever for all
-	 * request sizes.
-	 */
-	if (flags & KM_MAYFAIL)
-		lflags |= __GFP_RETRY_MAYFAIL;
-
-	if (flags & KM_ZERO)
-		lflags |= __GFP_ZERO;
-
-	if (flags & KM_NOLOCKDEP)
-		lflags |= __GFP_NOLOCKDEP;
-
-	return lflags;
-}
-
-extern void *kmem_alloc(size_t, xfs_km_flags_t);
 static inline void  kmem_free(const void *ptr)
 {
 	kvfree(ptr);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index ab4223bf51ee..033382cf514d 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -879,8 +879,7 @@ xfs_attr_shortform_to_leaf(
 
 	trace_xfs_attr_sf_to_leaf(args);
 
-	tmpbuffer = kmem_alloc(size, 0);
-	ASSERT(tmpbuffer != NULL);
+	tmpbuffer = kmalloc(size, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(tmpbuffer, ifp->if_data, size);
 	sf = (struct xfs_attr_sf_hdr *)tmpbuffer;
 
@@ -1059,7 +1058,7 @@ xfs_attr3_leaf_to_shortform(
 
 	trace_xfs_attr_leaf_to_sf(args);
 
-	tmpbuffer = kmem_alloc(args->geo->blksize, 0);
+	tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
 	if (!tmpbuffer)
 		return -ENOMEM;
 
@@ -1533,7 +1532,7 @@ xfs_attr3_leaf_compact(
 
 	trace_xfs_attr_leaf_compact(args);
 
-	tmpbuffer = kmem_alloc(args->geo->blksize, 0);
+	tmpbuffer = kmalloc(args->geo->blksize, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(tmpbuffer, bp->b_addr, args->geo->blksize);
 	memset(bp->b_addr, 0, args->geo->blksize);
 	leaf_src = (xfs_attr_leafblock_t *)tmpbuffer;
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index eff29425fd76..065e4a00a2f4 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -139,7 +139,7 @@ xfs_btree_stage_afakeroot(
 	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE));
 	ASSERT(cur->bc_tp == NULL);
 
-	nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
+	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_NOFS | __GFP_NOFAIL);
 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
 	nops->free_block = xfs_btree_fakeroot_free_block;
@@ -220,7 +220,7 @@ xfs_btree_stage_ifakeroot(
 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
 	ASSERT(cur->bc_tp == NULL);
 
-	nops = kmem_alloc(sizeof(struct xfs_btree_ops), KM_NOFS);
+	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_NOFS | __GFP_NOFAIL);
 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
 	nops->free_block = xfs_btree_fakeroot_free_block;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 73aae6543906..331b9251b185 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2182,7 +2182,8 @@ xfs_da_grow_inode_int(
 		 * If we didn't get it and the block might work if fragmented,
 		 * try without the CONTIG flag.  Loop until we get it all.
 		 */
-		mapp = kmem_alloc(sizeof(*mapp) * count, 0);
+		mapp = kmalloc(sizeof(*mapp) * count,
+				GFP_KERNEL | __GFP_NOFAIL);
 		for (b = *bno, mapi = 0; b < *bno + count; ) {
 			c = (int)(*bno + count - b);
 			nmap = min(XFS_BMAP_MAX_NMAP, c);
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 54915a302e96..370d67300455 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -333,7 +333,7 @@ xfs_dir_cilookup_result(
 					!(args->op_flags & XFS_DA_OP_CILOOKUP))
 		return -EEXIST;
 
-	args->value = kmem_alloc(len, KM_NOFS | KM_MAYFAIL);
+	args->value = kmalloc(len, GFP_NOFS | __GFP_RETRY_MAYFAIL);
 	if (!args->value)
 		return -ENOMEM;
 
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 3c256d4cc40b..506c65caaec5 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -1108,7 +1108,7 @@ xfs_dir2_sf_to_block(
 	 * Copy the directory into a temporary buffer.
 	 * Then pitch the incore inode data so we can make extents.
 	 */
-	sfp = kmem_alloc(ifp->if_bytes, 0);
+	sfp = kmalloc(ifp->if_bytes, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(sfp, oldsfp, ifp->if_bytes);
 
 	xfs_idata_realloc(dp, -ifp->if_bytes, XFS_DATA_FORK);
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index e1f83fc7b6ad..7b1f41cff9e0 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -276,7 +276,7 @@ xfs_dir2_block_to_sf(
 	 * format the data into.  Once we have formatted the data, we can free
 	 * the block and copy the formatted data into the inode literal area.
 	 */
-	sfp = kmem_alloc(mp->m_sb.sb_inodesize, 0);
+	sfp = kmalloc(mp->m_sb.sb_inodesize, GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(sfp, sfhp, xfs_dir2_sf_hdr_size(sfhp->i8count));
 
 	/*
@@ -524,7 +524,7 @@ xfs_dir2_sf_addname_hard(
 	 * Copy the old directory to the stack buffer.
 	 */
 	old_isize = (int)dp->i_disk_size;
-	buf = kmem_alloc(old_isize, 0);
+	buf = kmalloc(old_isize, GFP_KERNEL | __GFP_NOFAIL);
 	oldsfp = (xfs_dir2_sf_hdr_t *)buf;
 	memcpy(oldsfp, dp->i_df.if_data, old_isize);
 	/*
@@ -1151,7 +1151,7 @@ xfs_dir2_sf_toino4(
 	 * Don't want xfs_idata_realloc copying the data here.
 	 */
 	oldsize = dp->i_df.if_bytes;
-	buf = kmem_alloc(oldsize, 0);
+	buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL);
 	ASSERT(oldsfp->i8count == 1);
 	memcpy(buf, oldsfp, oldsize);
 	/*
@@ -1223,7 +1223,7 @@ xfs_dir2_sf_toino8(
 	 * Don't want xfs_idata_realloc copying the data here.
 	 */
 	oldsize = dp->i_df.if_bytes;
-	buf = kmem_alloc(oldsize, 0);
+	buf = kmalloc(oldsize, GFP_KERNEL | __GFP_NOFAIL);
 	ASSERT(oldsfp->i8count == 0);
 	memcpy(buf, oldsfp, oldsize);
 	/*
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f4569e18a8d0..f3cf7f933e15 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -50,7 +50,7 @@ xfs_init_local_fork(
 		mem_size++;
 
 	if (size) {
-		char *new_data = kmem_alloc(mem_size, KM_NOFS);
+		char *new_data = kmalloc(mem_size, GFP_NOFS | __GFP_NOFAIL);
 
 		memcpy(new_data, data, size);
 		if (zero_terminate)
@@ -77,7 +77,7 @@ xfs_iformat_local(
 	/*
 	 * If the size is unreasonable, then something
 	 * is wrong and we just bail out rather than crash in
-	 * kmem_alloc() or memcpy() below.
+	 * kmalloc() or memcpy() below.
 	 */
 	if (unlikely(size > XFS_DFORK_SIZE(dip, ip->i_mount, whichfork))) {
 		xfs_warn(ip->i_mount,
@@ -116,7 +116,7 @@ xfs_iformat_extents(
 
 	/*
 	 * If the number of extents is unreasonable, then something is wrong and
-	 * we just bail out rather than crash in kmem_alloc() or memcpy() below.
+	 * we just bail out rather than crash in kmalloc() or memcpy() below.
 	 */
 	if (unlikely(size < 0 || size > XFS_DFORK_SIZE(dip, mp, whichfork))) {
 		xfs_warn(ip->i_mount, "corrupt inode %llu ((a)extents = %llu).",
@@ -205,7 +205,7 @@ xfs_iformat_btree(
 	}
 
 	ifp->if_broot_bytes = size;
-	ifp->if_broot = kmem_alloc(size, KM_NOFS);
+	ifp->if_broot = kmalloc(size, GFP_NOFS | __GFP_NOFAIL);
 	ASSERT(ifp->if_broot != NULL);
 	/*
 	 * Copy and convert from the on-disk structure
@@ -399,7 +399,8 @@ xfs_iroot_realloc(
 		 */
 		if (ifp->if_broot_bytes == 0) {
 			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
-			ifp->if_broot = kmem_alloc(new_size, KM_NOFS);
+			ifp->if_broot = kmalloc(new_size,
+						GFP_NOFS | __GFP_NOFAIL);
 			ifp->if_broot_bytes = (int)new_size;
 			return;
 		}
@@ -440,7 +441,7 @@ xfs_iroot_realloc(
 	else
 		new_size = 0;
 	if (new_size > 0) {
-		new_broot = kmem_alloc(new_size, KM_NOFS);
+		new_broot = kmalloc(new_size, GFP_NOFS | __GFP_NOFAIL);
 		/*
 		 * First copy over the btree block header.
 		 */
@@ -488,7 +489,7 @@ xfs_iroot_realloc(
  *
  * If the amount of space needed has decreased below the size of the
  * inline buffer, then switch to using the inline buffer.  Otherwise,
- * use kmem_realloc() or kmem_alloc() to adjust the size of the buffer
+ * use krealloc() or kmalloc() to adjust the size of the buffer
  * to what is needed.
  *
  * ip -- the inode whose if_data area is changing
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index e368ad671e26..5f7a44d21cc9 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -109,7 +109,7 @@ xfs_attr_shortform_list(
 	 * It didn't all fit, so we have to sort everything on hashval.
 	 */
 	sbsize = sf->count * sizeof(*sbuf);
-	sbp = sbuf = kmem_alloc(sbsize, KM_NOFS);
+	sbp = sbuf = kmalloc(sbsize, GFP_NOFS | __GFP_NOFAIL);
 
 	/*
 	 * Scan the attribute list for the rest of the entries, storing
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 5672665150ea..6f53eb2d3de0 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -325,14 +325,14 @@ xfs_buf_alloc_kmem(
 	struct xfs_buf	*bp,
 	xfs_buf_flags_t	flags)
 {
-	xfs_km_flags_t	kmflag_mask = KM_NOFS;
+	gfp_t		gfp_mask = GFP_NOFS | __GFP_NOFAIL;
 	size_t		size = BBTOB(bp->b_length);
 
 	/* Assure zeroed buffer for non-read cases. */
 	if (!(flags & XBF_READ))
-		kmflag_mask |= KM_ZERO;
+		gfp_mask |= __GFP_ZERO;
 
-	bp->b_addr = kmem_alloc(size, kmflag_mask);
+	bp->b_addr = kmalloc(size, gfp_mask);
 	if (!bp->b_addr)
 		return -ENOMEM;
 
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 43167f543afc..34776f4c05ac 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -85,7 +85,7 @@ xlog_add_buffer_cancelled(
 		return false;
 	}
 
-	bcp = kmem_alloc(sizeof(struct xfs_buf_cancel), 0);
+	bcp = kmalloc(sizeof(struct xfs_buf_cancel), GFP_KERNEL | __GFP_NOFAIL);
 	bcp->bc_blkno = blkno;
 	bcp->bc_len = len;
 	bcp->bc_refcount = 1;
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index 2fc98d313708..e2a3c8d3fe4f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -313,7 +313,7 @@ xfs_filestream_create_association(
 	 * we return a referenced AG, the allocation can still go ahead just
 	 * fine.
 	 */
-	item = kmem_alloc(sizeof(*item), KM_MAYFAIL);
+	item = kmalloc(sizeof(*item), GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!item)
 		goto out_put_fstrms;
 
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 144198a6b270..5d7b937179a0 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -291,7 +291,8 @@ xlog_recover_inode_commit_pass2(
 	if (item->ri_buf[0].i_len == sizeof(struct xfs_inode_log_format)) {
 		in_f = item->ri_buf[0].i_addr;
 	} else {
-		in_f = kmem_alloc(sizeof(struct xfs_inode_log_format), 0);
+		in_f = kmalloc(sizeof(struct xfs_inode_log_format),
+				GFP_KERNEL | __GFP_NOFAIL);
 		need_free = 1;
 		error = xfs_inode_item_format_convert(&item->ri_buf[0], in_f);
 		if (error)
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 8dbb7c054b28..5dd622aa54c5 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -160,7 +160,7 @@ xfs_iwalk_alloc(
 
 	/* Allocate a prefetch buffer for inobt records. */
 	size = iwag->sz_recs * sizeof(struct xfs_inobt_rec_incore);
-	iwag->recs = kmem_alloc(size, KM_MAYFAIL);
+	iwag->recs = kmalloc(size, GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (iwag->recs == NULL)
 		return -ENOMEM;
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 4a27ecdbb546..e3bd503edcab 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2161,7 +2161,7 @@ xlog_recover_add_to_trans(
 		return 0;
 	}
 
-	ptr = kmem_alloc(len, 0);
+	ptr = xlog_kvmalloc(len);
 	memcpy(ptr, dp, len);
 	in_f = (struct xfs_inode_log_format *)ptr;
 
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 9c2528418b18..6b6b96454449 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -997,7 +997,8 @@ xfs_qm_reset_dqcounts_buf(
 	if (qip->i_nblocks == 0)
 		return 0;
 
-	map = kmem_alloc(XFS_DQITER_MAP_SIZE * sizeof(*map), 0);
+	map = kmalloc(XFS_DQITER_MAP_SIZE * sizeof(*map),
+			GFP_KERNEL | __GFP_NOFAIL);
 
 	lblkno = 0;
 	maxlblkcnt = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 8649d981a097..8a8d6197203e 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -903,7 +903,7 @@ xfs_growfs_rt(
 	/*
 	 * Allocate a new (fake) mount/sb.
 	 */
-	nmp = kmem_alloc(sizeof(*nmp), 0);
+	nmp = kmalloc(sizeof(*nmp), GFP_KERNEL | __GFP_NOFAIL);
 	/*
 	 * Loop over the bitmap blocks.
 	 * We will do everything one bitmap block at a time.
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5a2512d20bd0..d167ba00e7cf 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1987,7 +1987,7 @@ static int xfs_init_fs_context(
 {
 	struct xfs_mount	*mp;
 
-	mp = kmem_alloc(sizeof(struct xfs_mount), KM_ZERO);
+	mp = kzalloc(sizeof(struct xfs_mount), GFP_KERNEL | __GFP_NOFAIL);
 	if (!mp)
 		return -ENOMEM;
 
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 0984a1c884c7..c7e57efe0356 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4040,31 +4040,6 @@ TRACE_EVENT(xfs_pwork_init,
 		  __entry->nr_threads, __entry->pid)
 )
 
-DECLARE_EVENT_CLASS(xfs_kmem_class,
-	TP_PROTO(ssize_t size, int flags, unsigned long caller_ip),
-	TP_ARGS(size, flags, caller_ip),
-	TP_STRUCT__entry(
-		__field(ssize_t, size)
-		__field(int, flags)
-		__field(unsigned long, caller_ip)
-	),
-	TP_fast_assign(
-		__entry->size = size;
-		__entry->flags = flags;
-		__entry->caller_ip = caller_ip;
-	),
-	TP_printk("size %zd flags 0x%x caller %pS",
-		  __entry->size,
-		  __entry->flags,
-		  (char *)__entry->caller_ip)
-)
-
-#define DEFINE_KMEM_EVENT(name) \
-DEFINE_EVENT(xfs_kmem_class, name, \
-	TP_PROTO(ssize_t size, int flags, unsigned long caller_ip), \
-	TP_ARGS(size, flags, caller_ip))
-DEFINE_KMEM_EVENT(kmem_alloc);
-
 TRACE_EVENT(xfs_check_new_dalign,
 	TP_PROTO(struct xfs_mount *mp, int new_dalign, xfs_ino_t calc_rootino),
 	TP_ARGS(mp, new_dalign, calc_rootino),
-- 
cgit v1.2.3


From d4c75a1b40cd036a84d98e2711db9cf30eaaaf5f Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 16 Jan 2024 09:59:43 +1100
Subject: xfs: convert remaining kmem_free() to kfree()

The remaining callers of kmem_free() are freeing heap memory, so
we can convert them directly to kfree() and get rid of kmem_free()
altogether.

This conversion was done with:

$ for f in `git grep -l kmem_free fs/xfs`; do
> sed -i s/kmem_free/kfree/ $f
> done
$

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/kmem.h                     | 23 -----------------------
 fs/xfs/libxfs/xfs_ag.c            |  6 +++---
 fs/xfs/libxfs/xfs_attr_leaf.c     |  8 ++++----
 fs/xfs/libxfs/xfs_btree.c         |  2 +-
 fs/xfs/libxfs/xfs_btree_staging.c |  4 ++--
 fs/xfs/libxfs/xfs_da_btree.c      | 10 +++++-----
 fs/xfs/libxfs/xfs_defer.c         |  4 ++--
 fs/xfs/libxfs/xfs_dir2.c          | 18 +++++++++---------
 fs/xfs/libxfs/xfs_dir2_block.c    |  4 ++--
 fs/xfs/libxfs/xfs_dir2_sf.c       |  8 ++++----
 fs/xfs/libxfs/xfs_iext_tree.c     |  8 ++++----
 fs/xfs/libxfs/xfs_inode_fork.c    |  6 +++---
 fs/xfs/scrub/cow_repair.c         |  2 +-
 fs/xfs/xfs_attr_item.c            |  2 +-
 fs/xfs/xfs_attr_list.c            |  4 ++--
 fs/xfs/xfs_buf.c                  | 12 ++++++------
 fs/xfs/xfs_buf_item.c             |  2 +-
 fs/xfs/xfs_buf_item_recover.c     |  6 +++---
 fs/xfs/xfs_discard.c              |  2 +-
 fs/xfs/xfs_error.c                |  4 ++--
 fs/xfs/xfs_extent_busy.c          |  2 +-
 fs/xfs/xfs_extfree_item.c         |  4 ++--
 fs/xfs/xfs_filestream.c           |  4 ++--
 fs/xfs/xfs_inode.c                |  4 ++--
 fs/xfs/xfs_inode_item_recover.c   |  2 +-
 fs/xfs/xfs_ioctl.c                |  6 +++---
 fs/xfs/xfs_iops.c                 |  2 +-
 fs/xfs/xfs_itable.c               |  4 ++--
 fs/xfs/xfs_iwalk.c                |  4 ++--
 fs/xfs/xfs_linux.h                |  3 +--
 fs/xfs/xfs_log.c                  |  8 ++++----
 fs/xfs/xfs_log_cil.c              | 14 +++++++-------
 fs/xfs/xfs_log_recover.c          |  6 +++---
 fs/xfs/xfs_mount.c                |  2 +-
 fs/xfs/xfs_mru_cache.c            |  8 ++++----
 fs/xfs/xfs_qm.c                   |  6 +++---
 fs/xfs/xfs_refcount_item.c        |  2 +-
 fs/xfs/xfs_rmap_item.c            |  2 +-
 fs/xfs/xfs_rtalloc.c              |  2 +-
 fs/xfs/xfs_super.c                |  2 +-
 fs/xfs/xfs_trans_ail.c            |  4 ++--
 41 files changed, 101 insertions(+), 125 deletions(-)
 delete mode 100644 fs/xfs/kmem.h

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h
deleted file mode 100644
index 48e43f29f2a0..000000000000
--- a/fs/xfs/kmem.h
+++ /dev/null
@@ -1,23 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (c) 2000-2005 Silicon Graphics, Inc.
- * All Rights Reserved.
- */
-#ifndef __XFS_SUPPORT_KMEM_H__
-#define __XFS_SUPPORT_KMEM_H__
-
-#include <linux/slab.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/vmalloc.h>
-
-/*
- * General memory allocation interfaces
- */
-
-static inline void  kmem_free(const void *ptr)
-{
-	kvfree(ptr);
-}
-
-#endif /* __XFS_SUPPORT_KMEM_H__ */
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 96a6bfd58931..937ea48d5cc0 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -241,7 +241,7 @@ __xfs_free_perag(
 	struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head);
 
 	ASSERT(!delayed_work_pending(&pag->pag_blockgc_work));
-	kmem_free(pag);
+	kfree(pag);
 }
 
 /*
@@ -353,7 +353,7 @@ xfs_free_unused_perag_range(
 			break;
 		xfs_buf_hash_destroy(pag);
 		xfs_defer_drain_free(&pag->pag_intents_drain);
-		kmem_free(pag);
+		kfree(pag);
 	}
 }
 
@@ -453,7 +453,7 @@ out_remove_pag:
 	radix_tree_delete(&mp->m_perag_tree, index);
 	spin_unlock(&mp->m_perag_lock);
 out_free_pag:
-	kmem_free(pag);
+	kfree(pag);
 out_unwind_new_pags:
 	/* unwind any prior newly initialized pags */
 	xfs_free_unused_perag_range(mp, first_initialised, agcount);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 033382cf514d..192d9938a231 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -923,7 +923,7 @@ xfs_attr_shortform_to_leaf(
 	}
 	error = 0;
 out:
-	kmem_free(tmpbuffer);
+	kfree(tmpbuffer);
 	return error;
 }
 
@@ -1124,7 +1124,7 @@ xfs_attr3_leaf_to_shortform(
 	error = 0;
 
 out:
-	kmem_free(tmpbuffer);
+	kfree(tmpbuffer);
 	return error;
 }
 
@@ -1570,7 +1570,7 @@ xfs_attr3_leaf_compact(
 	 */
 	xfs_trans_log_buf(trans, bp, 0, args->geo->blksize - 1);
 
-	kmem_free(tmpbuffer);
+	kfree(tmpbuffer);
 }
 
 /*
@@ -2290,7 +2290,7 @@ xfs_attr3_leaf_unbalance(
 		}
 		memcpy(save_leaf, tmp_leaf, state->args->geo->blksize);
 		savehdr = tmphdr; /* struct copy */
-		kmem_free(tmp_leaf);
+		kfree(tmp_leaf);
 	}
 
 	xfs_attr3_leaf_hdr_to_disk(state->args->geo, save_leaf, &savehdr);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index ea8d3659df20..1adfc35c99c9 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -451,7 +451,7 @@ xfs_btree_del_cursor(
 	ASSERT(cur->bc_btnum != XFS_BTNUM_BMAP || cur->bc_ino.allocated == 0 ||
 	       xfs_is_shutdown(cur->bc_mp) || error != 0);
 	if (unlikely(cur->bc_flags & XFS_BTREE_STAGING))
-		kmem_free(cur->bc_ops);
+		kfree(cur->bc_ops);
 	if (!(cur->bc_flags & XFS_BTREE_LONG_PTRS) && cur->bc_ag.pag)
 		xfs_perag_put(cur->bc_ag.pag);
 	kmem_cache_free(cur->bc_cache, cur);
diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index 065e4a00a2f4..961f6b898f4b 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -171,7 +171,7 @@ xfs_btree_commit_afakeroot(
 
 	trace_xfs_btree_commit_afakeroot(cur);
 
-	kmem_free((void *)cur->bc_ops);
+	kfree((void *)cur->bc_ops);
 	cur->bc_ag.agbp = agbp;
 	cur->bc_ops = ops;
 	cur->bc_flags &= ~XFS_BTREE_STAGING;
@@ -254,7 +254,7 @@ xfs_btree_commit_ifakeroot(
 
 	trace_xfs_btree_commit_ifakeroot(cur);
 
-	kmem_free((void *)cur->bc_ops);
+	kfree((void *)cur->bc_ops);
 	cur->bc_ino.ifake = NULL;
 	cur->bc_ino.whichfork = whichfork;
 	cur->bc_ops = ops;
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index 331b9251b185..3383b4525381 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -2220,7 +2220,7 @@ xfs_da_grow_inode_int(
 
 out_free_map:
 	if (mapp != &map)
-		kmem_free(mapp);
+		kfree(mapp);
 	return error;
 }
 
@@ -2559,7 +2559,7 @@ xfs_dabuf_map(
 	*nmaps = nirecs;
 out_free_irecs:
 	if (irecs != &irec)
-		kmem_free(irecs);
+		kfree(irecs);
 	return error;
 
 invalid_mapping:
@@ -2615,7 +2615,7 @@ xfs_da_get_buf(
 
 out_free:
 	if (mapp != &map)
-		kmem_free(mapp);
+		kfree(mapp);
 
 	return error;
 }
@@ -2656,7 +2656,7 @@ xfs_da_read_buf(
 	*bpp = bp;
 out_free:
 	if (mapp != &map)
-		kmem_free(mapp);
+		kfree(mapp);
 
 	return error;
 }
@@ -2687,7 +2687,7 @@ xfs_da_reada_buf(
 
 out_free:
 	if (mapp != &map)
-		kmem_free(mapp);
+		kfree(mapp);
 
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 07d318b1f807..75689c151a54 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -1038,7 +1038,7 @@ xfs_defer_ops_capture_abort(
 	for (i = 0; i < dfc->dfc_held.dr_inos; i++)
 		xfs_irele(dfc->dfc_held.dr_ip[i]);
 
-	kmem_free(dfc);
+	kfree(dfc);
 }
 
 /*
@@ -1114,7 +1114,7 @@ xfs_defer_ops_continue(
 	list_splice_init(&dfc->dfc_dfops, &tp->t_dfops);
 	tp->t_flags |= dfc->dfc_tpflags;
 
-	kmem_free(dfc);
+	kfree(dfc);
 }
 
 /* Release the resources captured and continued during recovery. */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 370d67300455..e60aa8f8d0a7 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -109,8 +109,8 @@ xfs_da_mount(
 	mp->m_attr_geo = kzalloc(sizeof(struct xfs_da_geometry),
 				GFP_KERNEL | __GFP_RETRY_MAYFAIL);
 	if (!mp->m_dir_geo || !mp->m_attr_geo) {
-		kmem_free(mp->m_dir_geo);
-		kmem_free(mp->m_attr_geo);
+		kfree(mp->m_dir_geo);
+		kfree(mp->m_attr_geo);
 		return -ENOMEM;
 	}
 
@@ -178,8 +178,8 @@ void
 xfs_da_unmount(
 	struct xfs_mount	*mp)
 {
-	kmem_free(mp->m_dir_geo);
-	kmem_free(mp->m_attr_geo);
+	kfree(mp->m_dir_geo);
+	kfree(mp->m_attr_geo);
 }
 
 /*
@@ -244,7 +244,7 @@ xfs_dir_init(
 	args->dp = dp;
 	args->trans = tp;
 	error = xfs_dir2_sf_create(args, pdp->i_ino);
-	kmem_free(args);
+	kfree(args);
 	return error;
 }
 
@@ -313,7 +313,7 @@ xfs_dir_createname(
 		rval = xfs_dir2_node_addname(args);
 
 out_free:
-	kmem_free(args);
+	kfree(args);
 	return rval;
 }
 
@@ -419,7 +419,7 @@ out_check_rval:
 	}
 out_free:
 	xfs_iunlock(dp, lock_mode);
-	kmem_free(args);
+	kfree(args);
 	return rval;
 }
 
@@ -477,7 +477,7 @@ xfs_dir_removename(
 	else
 		rval = xfs_dir2_node_removename(args);
 out_free:
-	kmem_free(args);
+	kfree(args);
 	return rval;
 }
 
@@ -538,7 +538,7 @@ xfs_dir_replace(
 	else
 		rval = xfs_dir2_node_replace(args);
 out_free:
-	kmem_free(args);
+	kfree(args);
 	return rval;
 }
 
diff --git a/fs/xfs/libxfs/xfs_dir2_block.c b/fs/xfs/libxfs/xfs_dir2_block.c
index 506c65caaec5..fde46081a824 100644
--- a/fs/xfs/libxfs/xfs_dir2_block.c
+++ b/fs/xfs/libxfs/xfs_dir2_block.c
@@ -1253,7 +1253,7 @@ xfs_dir2_sf_to_block(
 			sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
 	}
 	/* Done with the temporary buffer */
-	kmem_free(sfp);
+	kfree(sfp);
 	/*
 	 * Sort the leaf entries by hash value.
 	 */
@@ -1268,6 +1268,6 @@ xfs_dir2_sf_to_block(
 	xfs_dir3_data_check(dp, bp);
 	return 0;
 out_free:
-	kmem_free(sfp);
+	kfree(sfp);
 	return error;
 }
diff --git a/fs/xfs/libxfs/xfs_dir2_sf.c b/fs/xfs/libxfs/xfs_dir2_sf.c
index 7b1f41cff9e0..17a20384c8b7 100644
--- a/fs/xfs/libxfs/xfs_dir2_sf.c
+++ b/fs/xfs/libxfs/xfs_dir2_sf.c
@@ -350,7 +350,7 @@ xfs_dir2_block_to_sf(
 	xfs_dir2_sf_check(args);
 out:
 	xfs_trans_log_inode(args->trans, dp, logflags);
-	kmem_free(sfp);
+	kfree(sfp);
 	return error;
 }
 
@@ -576,7 +576,7 @@ xfs_dir2_sf_addname_hard(
 		sfep = xfs_dir2_sf_nextentry(mp, sfp, sfep);
 		memcpy(sfep, oldsfep, old_isize - nbytes);
 	}
-	kmem_free(buf);
+	kfree(buf);
 	dp->i_disk_size = new_isize;
 	xfs_dir2_sf_check(args);
 }
@@ -1190,7 +1190,7 @@ xfs_dir2_sf_toino4(
 	/*
 	 * Clean up the inode.
 	 */
-	kmem_free(buf);
+	kfree(buf);
 	dp->i_disk_size = newsize;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
@@ -1262,7 +1262,7 @@ xfs_dir2_sf_toino8(
 	/*
 	 * Clean up the inode.
 	 */
-	kmem_free(buf);
+	kfree(buf);
 	dp->i_disk_size = newsize;
 	xfs_trans_log_inode(args->trans, dp, XFS_ILOG_CORE | XFS_ILOG_DDATA);
 }
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 4522f3c7a23f..16f18b08fe4c 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -747,7 +747,7 @@ xfs_iext_remove_node(
 again:
 	ASSERT(node->ptrs[pos]);
 	ASSERT(node->ptrs[pos] == victim);
-	kmem_free(victim);
+	kfree(victim);
 
 	nr_entries = xfs_iext_node_nr_entries(node, pos) - 1;
 	offset = node->keys[0];
@@ -793,7 +793,7 @@ again:
 		ASSERT(node == ifp->if_data);
 		ifp->if_data = node->ptrs[0];
 		ifp->if_height--;
-		kmem_free(node);
+		kfree(node);
 	}
 }
 
@@ -867,7 +867,7 @@ xfs_iext_free_last_leaf(
 	struct xfs_ifork	*ifp)
 {
 	ifp->if_height--;
-	kmem_free(ifp->if_data);
+	kfree(ifp->if_data);
 	ifp->if_data = NULL;
 }
 
@@ -1048,7 +1048,7 @@ xfs_iext_destroy_node(
 		}
 	}
 
-	kmem_free(node);
+	kfree(node);
 }
 
 void
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index f3cf7f933e15..f6d5b86b608d 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -471,7 +471,7 @@ xfs_iroot_realloc(
 						     (int)new_size);
 		memcpy(np, op, new_max * (uint)sizeof(xfs_fsblock_t));
 	}
-	kmem_free(ifp->if_broot);
+	kfree(ifp->if_broot);
 	ifp->if_broot = new_broot;
 	ifp->if_broot_bytes = (int)new_size;
 	if (ifp->if_broot)
@@ -525,13 +525,13 @@ xfs_idestroy_fork(
 	struct xfs_ifork	*ifp)
 {
 	if (ifp->if_broot != NULL) {
-		kmem_free(ifp->if_broot);
+		kfree(ifp->if_broot);
 		ifp->if_broot = NULL;
 	}
 
 	switch (ifp->if_format) {
 	case XFS_DINODE_FMT_LOCAL:
-		kmem_free(ifp->if_data);
+		kfree(ifp->if_data);
 		ifp->if_data = NULL;
 		break;
 	case XFS_DINODE_FMT_EXTENTS:
diff --git a/fs/xfs/scrub/cow_repair.c b/fs/xfs/scrub/cow_repair.c
index 1e82c727af8e..4de3f0f40f48 100644
--- a/fs/xfs/scrub/cow_repair.c
+++ b/fs/xfs/scrub/cow_repair.c
@@ -609,6 +609,6 @@ xrep_bmap_cow(
 out_bitmap:
 	xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks);
 	xoff_bitmap_destroy(&xc->bad_fileoffs);
-	kmem_free(xc);
+	kfree(xc);
 	return error;
 }
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index f7ba80d575d4..2a142cefdc3d 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -386,7 +386,7 @@ xfs_attr_free_item(
 		xfs_da_state_free(attr->xattri_da_state);
 	xfs_attri_log_nameval_put(attr->xattri_nameval);
 	if (attr->xattri_da_args->op_flags & XFS_DA_OP_RECOVERY)
-		kmem_free(attr);
+		kfree(attr);
 	else
 		kmem_cache_free(xfs_attr_intent_cache, attr);
 }
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 5f7a44d21cc9..0318d768520a 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -124,7 +124,7 @@ xfs_attr_shortform_list(
 					     XFS_ERRLEVEL_LOW,
 					     context->dp->i_mount, sfe,
 					     sizeof(*sfe));
-			kmem_free(sbuf);
+			kfree(sbuf);
 			return -EFSCORRUPTED;
 		}
 
@@ -188,7 +188,7 @@ xfs_attr_shortform_list(
 		cursor->offset++;
 	}
 out:
-	kmem_free(sbuf);
+	kfree(sbuf);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 6f53eb2d3de0..10bbde3ce7cc 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -204,7 +204,7 @@ xfs_buf_free_maps(
 	struct xfs_buf	*bp)
 {
 	if (bp->b_maps != &bp->__b_map) {
-		kmem_free(bp->b_maps);
+		kfree(bp->b_maps);
 		bp->b_maps = NULL;
 	}
 }
@@ -289,7 +289,7 @@ xfs_buf_free_pages(
 	mm_account_reclaimed_pages(bp->b_page_count);
 
 	if (bp->b_pages != bp->b_page_array)
-		kmem_free(bp->b_pages);
+		kfree(bp->b_pages);
 	bp->b_pages = NULL;
 	bp->b_flags &= ~_XBF_PAGES;
 }
@@ -315,7 +315,7 @@ xfs_buf_free(
 	if (bp->b_flags & _XBF_PAGES)
 		xfs_buf_free_pages(bp);
 	else if (bp->b_flags & _XBF_KMEM)
-		kmem_free(bp->b_addr);
+		kfree(bp->b_addr);
 
 	call_rcu(&bp->b_rcu, xfs_buf_free_callback);
 }
@@ -339,7 +339,7 @@ xfs_buf_alloc_kmem(
 	if (((unsigned long)(bp->b_addr + size - 1) & PAGE_MASK) !=
 	    ((unsigned long)bp->b_addr & PAGE_MASK)) {
 		/* b_addr spans two pages - use alloc_page instead */
-		kmem_free(bp->b_addr);
+		kfree(bp->b_addr);
 		bp->b_addr = NULL;
 		return -ENOMEM;
 	}
@@ -1953,7 +1953,7 @@ xfs_free_buftarg(
 	if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
 		bdev_release(btp->bt_bdev_handle);
 
-	kmem_free(btp);
+	kfree(btp);
 }
 
 int
@@ -2045,7 +2045,7 @@ error_pcpu:
 error_lru:
 	list_lru_destroy(&btp->bt_lru);
 error_free:
-	kmem_free(btp);
+	kfree(btp);
 	return NULL;
 }
 
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 545040c6ae87..43031842341a 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -814,7 +814,7 @@ xfs_buf_item_free_format(
 	struct xfs_buf_log_item	*bip)
 {
 	if (bip->bli_formats != &bip->__bli_format) {
-		kmem_free(bip->bli_formats);
+		kfree(bip->bli_formats);
 		bip->bli_formats = NULL;
 	}
 }
diff --git a/fs/xfs/xfs_buf_item_recover.c b/fs/xfs/xfs_buf_item_recover.c
index 34776f4c05ac..09e893cf563c 100644
--- a/fs/xfs/xfs_buf_item_recover.c
+++ b/fs/xfs/xfs_buf_item_recover.c
@@ -129,7 +129,7 @@ xlog_put_buffer_cancelled(
 
 	if (--bcp->bc_refcount == 0) {
 		list_del(&bcp->bc_list);
-		kmem_free(bcp);
+		kfree(bcp);
 	}
 	return true;
 }
@@ -1062,10 +1062,10 @@ xlog_free_buf_cancel_table(
 				&log->l_buf_cancel_table[i],
 				struct xfs_buf_cancel, bc_list))) {
 			list_del(&bc->bc_list);
-			kmem_free(bc);
+			kfree(bc);
 		}
 	}
 
-	kmem_free(log->l_buf_cancel_table);
+	kfree(log->l_buf_cancel_table);
 	log->l_buf_cancel_table = NULL;
 }
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index d5787991bb5b..8539f5c9a774 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -79,7 +79,7 @@ xfs_discard_endio_work(
 		container_of(work, struct xfs_busy_extents, endio_work);
 
 	xfs_extent_busy_clear(extents->mount, &extents->extent_list, false);
-	kmem_free(extents->owner);
+	kfree(extents->owner);
 }
 
 /*
diff --git a/fs/xfs/xfs_error.c b/fs/xfs/xfs_error.c
index 456520d60cd0..7ad0e92c6b5b 100644
--- a/fs/xfs/xfs_error.c
+++ b/fs/xfs/xfs_error.c
@@ -248,7 +248,7 @@ xfs_errortag_init(
 	ret = xfs_sysfs_init(&mp->m_errortag_kobj, &xfs_errortag_ktype,
 				&mp->m_kobj, "errortag");
 	if (ret)
-		kmem_free(mp->m_errortag);
+		kfree(mp->m_errortag);
 	return ret;
 }
 
@@ -257,7 +257,7 @@ xfs_errortag_del(
 	struct xfs_mount	*mp)
 {
 	xfs_sysfs_del(&mp->m_errortag_kobj);
-	kmem_free(mp->m_errortag);
+	kfree(mp->m_errortag);
 }
 
 static bool
diff --git a/fs/xfs/xfs_extent_busy.c b/fs/xfs/xfs_extent_busy.c
index b90c3dd43e03..56cfa1498571 100644
--- a/fs/xfs/xfs_extent_busy.c
+++ b/fs/xfs/xfs_extent_busy.c
@@ -531,7 +531,7 @@ xfs_extent_busy_clear_one(
 	}
 
 	list_del_init(&busyp->list);
-	kmem_free(busyp);
+	kfree(busyp);
 }
 
 static void
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6062703a2723..8c382f092332 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -42,7 +42,7 @@ xfs_efi_item_free(
 {
 	kvfree(efip->efi_item.li_lv_shadow);
 	if (efip->efi_format.efi_nextents > XFS_EFI_MAX_FAST_EXTENTS)
-		kmem_free(efip);
+		kfree(efip);
 	else
 		kmem_cache_free(xfs_efi_cache, efip);
 }
@@ -231,7 +231,7 @@ xfs_efd_item_free(struct xfs_efd_log_item *efdp)
 {
 	kvfree(efdp->efd_item.li_lv_shadow);
 	if (efdp->efd_format.efd_nextents > XFS_EFD_MAX_FAST_EXTENTS)
-		kmem_free(efdp);
+		kfree(efdp);
 	else
 		kmem_cache_free(xfs_efd_cache, efdp);
 }
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index e2a3c8d3fe4f..e3aaa0555597 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -44,7 +44,7 @@ xfs_fstrm_free_func(
 	atomic_dec(&pag->pagf_fstrms);
 	xfs_perag_rele(pag);
 
-	kmem_free(item);
+	kfree(item);
 }
 
 /*
@@ -326,7 +326,7 @@ xfs_filestream_create_association(
 
 out_free_item:
 	xfs_perag_rele(item->pag);
-	kmem_free(item);
+	kfree(item);
 out_put_fstrms:
 	atomic_dec(&args->pag->pagf_fstrms);
 	return 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 1fd94958aa97..37ec247edc13 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -671,7 +671,7 @@ xfs_lookup(
 
 out_free_name:
 	if (ci_name)
-		kmem_free(ci_name->name);
+		kfree(ci_name->name);
 out_unlock:
 	*ipp = NULL;
 	return error;
@@ -2378,7 +2378,7 @@ xfs_ifree(
 	 * already been freed by xfs_attr_inactive.
 	 */
 	if (ip->i_df.if_format == XFS_DINODE_FMT_LOCAL) {
-		kmem_free(ip->i_df.if_data);
+		kfree(ip->i_df.if_data);
 		ip->i_df.if_data = NULL;
 		ip->i_df.if_bytes = 0;
 	}
diff --git a/fs/xfs/xfs_inode_item_recover.c b/fs/xfs/xfs_inode_item_recover.c
index 5d7b937179a0..dbdab4ce7c44 100644
--- a/fs/xfs/xfs_inode_item_recover.c
+++ b/fs/xfs/xfs_inode_item_recover.c
@@ -554,7 +554,7 @@ out_release:
 	xfs_buf_relse(bp);
 error:
 	if (need_free)
-		kmem_free(in_f);
+		kfree(in_f);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 45fb169bd819..7eeebcb6b925 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -435,7 +435,7 @@ xfs_ioc_attr_list(
 	    copy_to_user(ucursor, &context.cursor, sizeof(context.cursor)))
 		error = -EFAULT;
 out_free:
-	kmem_free(buffer);
+	kfree(buffer);
 	return error;
 }
 
@@ -1506,7 +1506,7 @@ xfs_ioc_getbmap(
 
 	error = 0;
 out_free_buf:
-	kmem_free(buf);
+	kfree(buf);
 	return error;
 }
 
@@ -1636,7 +1636,7 @@ xfs_ioc_getfsmap(
 	}
 
 out_free:
-	kmem_free(recs);
+	kfree(recs);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index a0d77f5f512e..be102fd49560 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -346,7 +346,7 @@ xfs_vn_ci_lookup(
 	dname.name = ci_name.name;
 	dname.len = ci_name.len;
 	dentry = d_add_ci(dentry, VFS_I(ip), &dname);
-	kmem_free(ci_name.name);
+	kfree(ci_name.name);
 	return dentry;
 }
 
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 14211174267a..95fc31b9f87d 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -214,7 +214,7 @@ xfs_bulkstat_one(
 			breq->startino, &bc);
 	xfs_trans_cancel(tp);
 out:
-	kmem_free(bc.buf);
+	kfree(bc.buf);
 
 	/*
 	 * If we reported one inode to userspace then we abort because we hit
@@ -309,7 +309,7 @@ xfs_bulkstat(
 			xfs_bulkstat_iwalk, breq->icount, &bc);
 	xfs_trans_cancel(tp);
 out:
-	kmem_free(bc.buf);
+	kfree(bc.buf);
 
 	/*
 	 * We found some inodes, so clear the error status and return them.
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 5dd622aa54c5..6d2eb6364867 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -172,7 +172,7 @@ STATIC void
 xfs_iwalk_free(
 	struct xfs_iwalk_ag	*iwag)
 {
-	kmem_free(iwag->recs);
+	kfree(iwag->recs);
 	iwag->recs = NULL;
 }
 
@@ -627,7 +627,7 @@ xfs_iwalk_ag_work(
 	xfs_iwalk_free(iwag);
 out:
 	xfs_perag_put(iwag->pag);
-	kmem_free(iwag);
+	kfree(iwag);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index 666618b463c9..caccb7f76690 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -20,8 +20,6 @@ typedef __u32			xfs_dev_t;
 typedef __u32			xfs_nlink_t;
 
 #include "xfs_types.h"
-
-#include "kmem.h"
 #include "mrlock.h"
 
 #include <linux/semaphore.h>
@@ -30,6 +28,7 @@ typedef __u32			xfs_nlink_t;
 #include <linux/kernel.h>
 #include <linux/blkdev.h>
 #include <linux/slab.h>
+#include <linux/vmalloc.h>
 #include <linux/crc32c.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 0009ffbec932..ee39639bb92b 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1663,12 +1663,12 @@ out_free_iclog:
 	for (iclog = log->l_iclog; iclog; iclog = prev_iclog) {
 		prev_iclog = iclog->ic_next;
 		kvfree(iclog->ic_data);
-		kmem_free(iclog);
+		kfree(iclog);
 		if (prev_iclog == log->l_iclog)
 			break;
 	}
 out_free_log:
-	kmem_free(log);
+	kfree(log);
 out:
 	return ERR_PTR(error);
 }	/* xlog_alloc_log */
@@ -2120,13 +2120,13 @@ xlog_dealloc_log(
 	for (i = 0; i < log->l_iclog_bufs; i++) {
 		next_iclog = iclog->ic_next;
 		kvfree(iclog->ic_data);
-		kmem_free(iclog);
+		kfree(iclog);
 		iclog = next_iclog;
 	}
 
 	log->l_mp->m_log = NULL;
 	destroy_workqueue(log->l_ioend_workqueue);
-	kmem_free(log);
+	kfree(log);
 }
 
 /*
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c
index 2c0512916cc9..815a2181004c 100644
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -703,7 +703,7 @@ xlog_cil_free_logvec(
 	while (!list_empty(lv_chain)) {
 		lv = list_first_entry(lv_chain, struct xfs_log_vec, lv_list);
 		list_del_init(&lv->lv_list);
-		kmem_free(lv);
+		kfree(lv);
 	}
 }
 
@@ -753,7 +753,7 @@ xlog_cil_committed(
 		return;
 	}
 
-	kmem_free(ctx);
+	kfree(ctx);
 }
 
 void
@@ -1339,7 +1339,7 @@ xlog_cil_push_work(
 out_skip:
 	up_write(&cil->xc_ctx_lock);
 	xfs_log_ticket_put(new_ctx->ticket);
-	kmem_free(new_ctx);
+	kfree(new_ctx);
 	return;
 
 out_abort_free_ticket:
@@ -1533,7 +1533,7 @@ xlog_cil_process_intents(
 		set_bit(XFS_LI_WHITEOUT, &ilip->li_flags);
 		trace_xfs_cil_whiteout_mark(ilip);
 		len += ilip->li_lv->lv_bytes;
-		kmem_free(ilip->li_lv);
+		kfree(ilip->li_lv);
 		ilip->li_lv = NULL;
 
 		xfs_trans_del_item(lip);
@@ -1786,7 +1786,7 @@ xlog_cil_init(
 out_destroy_wq:
 	destroy_workqueue(cil->xc_push_wq);
 out_destroy_cil:
-	kmem_free(cil);
+	kfree(cil);
 	return -ENOMEM;
 }
 
@@ -1799,12 +1799,12 @@ xlog_cil_destroy(
 	if (cil->xc_ctx) {
 		if (cil->xc_ctx->ticket)
 			xfs_log_ticket_put(cil->xc_ctx->ticket);
-		kmem_free(cil->xc_ctx);
+		kfree(cil->xc_ctx);
 	}
 
 	ASSERT(test_bit(XLOG_CIL_EMPTY, &cil->xc_flags));
 	free_percpu(cil->xc_pcp);
 	destroy_workqueue(cil->xc_push_wq);
-	kmem_free(cil);
+	kfree(cil);
 }
 
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 295306ef6959..e9ed43a833af 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2229,11 +2229,11 @@ xlog_recover_free_trans(
 		for (i = 0; i < item->ri_cnt; i++)
 			kvfree(item->ri_buf[i].i_addr);
 		/* Free the item itself */
-		kmem_free(item->ri_buf);
-		kmem_free(item);
+		kfree(item->ri_buf);
+		kfree(item);
 	}
 	/* Free the transaction recover structure */
-	kmem_free(trans);
+	kfree(trans);
 }
 
 /*
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index aabb25dc3efa..7328034d42ed 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -45,7 +45,7 @@ xfs_uuid_table_free(void)
 {
 	if (xfs_uuid_table_size == 0)
 		return;
-	kmem_free(xfs_uuid_table);
+	kfree(xfs_uuid_table);
 	xfs_uuid_table = NULL;
 	xfs_uuid_table_size = 0;
 }
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index feae3115617b..ce496704748d 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -365,9 +365,9 @@ xfs_mru_cache_create(
 
 exit:
 	if (err && mru && mru->lists)
-		kmem_free(mru->lists);
+		kfree(mru->lists);
 	if (err && mru)
-		kmem_free(mru);
+		kfree(mru);
 
 	return err;
 }
@@ -407,8 +407,8 @@ xfs_mru_cache_destroy(
 
 	xfs_mru_cache_flush(mru);
 
-	kmem_free(mru->lists);
-	kmem_free(mru);
+	kfree(mru->lists);
+	kfree(mru);
 }
 
 /*
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 6b6b96454449..6ad558ae853a 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -701,7 +701,7 @@ out_free_inos:
 out_free_lru:
 	list_lru_destroy(&qinf->qi_lru);
 out_free_qinf:
-	kmem_free(qinf);
+	kfree(qinf);
 	mp->m_quotainfo = NULL;
 	return error;
 }
@@ -725,7 +725,7 @@ xfs_qm_destroy_quotainfo(
 	xfs_qm_destroy_quotainos(qi);
 	mutex_destroy(&qi->qi_tree_lock);
 	mutex_destroy(&qi->qi_quotaofflock);
-	kmem_free(qi);
+	kfree(qi);
 	mp->m_quotainfo = NULL;
 }
 
@@ -1060,7 +1060,7 @@ xfs_qm_reset_dqcounts_buf(
 	} while (nmaps > 0);
 
 out:
-	kmem_free(map);
+	kfree(map);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index a9b322e23cfb..d850b9685f7f 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -38,7 +38,7 @@ xfs_cui_item_free(
 {
 	kvfree(cuip->cui_item.li_lv_shadow);
 	if (cuip->cui_format.cui_nextents > XFS_CUI_MAX_FAST_EXTENTS)
-		kmem_free(cuip);
+		kfree(cuip);
 	else
 		kmem_cache_free(xfs_cui_cache, cuip);
 }
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 489ca8c0e1dc..a40b92ac81e8 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -38,7 +38,7 @@ xfs_rui_item_free(
 {
 	kvfree(ruip->rui_item.li_lv_shadow);
 	if (ruip->rui_format.rui_nextents > XFS_RUI_MAX_FAST_EXTENTS)
-		kmem_free(ruip);
+		kfree(ruip);
 	else
 		kmem_cache_free(xfs_rui_cache, ruip);
 }
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 57ed9baaf156..2f85567f3d75 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1050,7 +1050,7 @@ out_free:
 	/*
 	 * Free the fake mp structure.
 	 */
-	kmem_free(nmp);
+	kfree(nmp);
 
 	/*
 	 * If we had to allocate a new rsum_cache, we either need to free the
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index d167ba00e7cf..6ce1e6deb7ec 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -761,7 +761,7 @@ xfs_mount_free(
 	debugfs_remove(mp->m_debugfs);
 	kfree(mp->m_rtname);
 	kfree(mp->m_logname);
-	kmem_free(mp);
+	kfree(mp);
 }
 
 STATIC int
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 5f206cdb40ff..e4c343096f95 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -922,7 +922,7 @@ xfs_trans_ail_init(
 	return 0;
 
 out_free_ailp:
-	kmem_free(ailp);
+	kfree(ailp);
 	return -ENOMEM;
 }
 
@@ -933,5 +933,5 @@ xfs_trans_ail_destroy(
 	struct xfs_ail	*ailp = mp->m_ail;
 
 	kthread_stop(ailp->ail_task);
-	kmem_free(ailp);
+	kfree(ailp);
 }
-- 
cgit v1.2.3


From 0b3a76e955ebe3d71a2bcd5990404ed522b40e17 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 16 Jan 2024 09:59:46 +1100
Subject: xfs: use GFP_KERNEL in pure transaction contexts

When running in a transaction context, memory allocations are scoped
to GFP_NOFS. Hence we don't need to use GFP_NOFS contexts in pure
transaction context allocations - GFP_KERNEL will automatically get
converted to GFP_NOFS as appropriate.

Go through the code and convert all the obvious GFP_NOFS allocations
in transaction context to use GFP_KERNEL. This further reduces the
explicit use of GFP_NOFS in XFS.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_attr.c       |  3 ++-
 fs/xfs/libxfs/xfs_bmap.c       |  2 +-
 fs/xfs/libxfs/xfs_defer.c      |  6 +++---
 fs/xfs/libxfs/xfs_dir2.c       |  8 ++++----
 fs/xfs/libxfs/xfs_inode_fork.c |  8 ++++----
 fs/xfs/libxfs/xfs_refcount.c   |  2 +-
 fs/xfs/libxfs/xfs_rmap.c       |  2 +-
 fs/xfs/xfs_attr_item.c         |  4 ++--
 fs/xfs/xfs_bmap_util.c         |  2 +-
 fs/xfs/xfs_buf.c               | 28 +++++++++++++++++-----------
 fs/xfs/xfs_log.c               |  3 ++-
 fs/xfs/xfs_mru_cache.c         |  2 +-
 12 files changed, 39 insertions(+), 31 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index e965a48e7db9..82ab559f1da1 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -891,7 +891,8 @@ xfs_attr_defer_add(
 
 	struct xfs_attr_intent	*new;
 
-	new = kmem_cache_zalloc(xfs_attr_intent_cache, GFP_NOFS | __GFP_NOFAIL);
+	new = kmem_cache_zalloc(xfs_attr_intent_cache,
+			GFP_KERNEL | __GFP_NOFAIL);
 	new->xattri_op_flags = op_flags;
 	new->xattri_da_args = args;
 
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index f362345467fa..b525524a2da4 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -6098,7 +6098,7 @@ __xfs_bmap_add(
 			bmap->br_blockcount,
 			bmap->br_state);
 
-	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
+	bi = kmem_cache_alloc(xfs_bmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&bi->bi_list);
 	bi->bi_type = type;
 	bi->bi_owner = ip;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 75689c151a54..8ae4401f6810 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -825,7 +825,7 @@ xfs_defer_alloc(
 	struct xfs_defer_pending	*dfp;
 
 	dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
-			GFP_NOFS | __GFP_NOFAIL);
+			GFP_KERNEL | __GFP_NOFAIL);
 	dfp->dfp_ops = ops;
 	INIT_LIST_HEAD(&dfp->dfp_work);
 	list_add_tail(&dfp->dfp_list, &tp->t_dfops);
@@ -888,7 +888,7 @@ xfs_defer_start_recovery(
 	struct xfs_defer_pending	*dfp;
 
 	dfp = kmem_cache_zalloc(xfs_defer_pending_cache,
-			GFP_NOFS | __GFP_NOFAIL);
+			GFP_KERNEL | __GFP_NOFAIL);
 	dfp->dfp_ops = ops;
 	dfp->dfp_intent = lip;
 	INIT_LIST_HEAD(&dfp->dfp_work);
@@ -979,7 +979,7 @@ xfs_defer_ops_capture(
 		return ERR_PTR(error);
 
 	/* Create an object to capture the defer ops. */
-	dfc = kzalloc(sizeof(*dfc), GFP_NOFS | __GFP_NOFAIL);
+	dfc = kzalloc(sizeof(*dfc), GFP_KERNEL | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&dfc->dfc_list);
 	INIT_LIST_HEAD(&dfc->dfc_dfops);
 
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 728f72f0d078..8c9403b33191 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -236,7 +236,7 @@ xfs_dir_init(
 	if (error)
 		return error;
 
-	args = kzalloc(sizeof(*args), GFP_NOFS | __GFP_NOFAIL);
+	args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
 	if (!args)
 		return -ENOMEM;
 
@@ -273,7 +273,7 @@ xfs_dir_createname(
 		XFS_STATS_INC(dp->i_mount, xs_dir_create);
 	}
 
-	args = kzalloc(sizeof(*args), GFP_NOFS | __GFP_NOFAIL);
+	args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
 	if (!args)
 		return -ENOMEM;
 
@@ -435,7 +435,7 @@ xfs_dir_removename(
 	ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
 	XFS_STATS_INC(dp->i_mount, xs_dir_remove);
 
-	args = kzalloc(sizeof(*args), GFP_NOFS | __GFP_NOFAIL);
+	args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
 	if (!args)
 		return -ENOMEM;
 
@@ -496,7 +496,7 @@ xfs_dir_replace(
 	if (rval)
 		return rval;
 
-	args = kzalloc(sizeof(*args), GFP_NOFS | __GFP_NOFAIL);
+	args = kzalloc(sizeof(*args), GFP_KERNEL | __GFP_NOFAIL);
 	if (!args)
 		return -ENOMEM;
 
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 709fda3d742f..136d5d7b9de9 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -402,7 +402,7 @@ xfs_iroot_realloc(
 		if (ifp->if_broot_bytes == 0) {
 			new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, rec_diff);
 			ifp->if_broot = kmalloc(new_size,
-						GFP_NOFS | __GFP_NOFAIL);
+						GFP_KERNEL | __GFP_NOFAIL);
 			ifp->if_broot_bytes = (int)new_size;
 			return;
 		}
@@ -417,7 +417,7 @@ xfs_iroot_realloc(
 		new_max = cur_max + rec_diff;
 		new_size = XFS_BMAP_BROOT_SPACE_CALC(mp, new_max);
 		ifp->if_broot = krealloc(ifp->if_broot, new_size,
-					 GFP_NOFS | __GFP_NOFAIL);
+					 GFP_KERNEL | __GFP_NOFAIL);
 		op = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
 						     ifp->if_broot_bytes);
 		np = (char *)XFS_BMAP_BROOT_PTR_ADDR(mp, ifp->if_broot, 1,
@@ -443,7 +443,7 @@ xfs_iroot_realloc(
 	else
 		new_size = 0;
 	if (new_size > 0) {
-		new_broot = kmalloc(new_size, GFP_NOFS | __GFP_NOFAIL);
+		new_broot = kmalloc(new_size, GFP_KERNEL | __GFP_NOFAIL);
 		/*
 		 * First copy over the btree block header.
 		 */
@@ -512,7 +512,7 @@ xfs_idata_realloc(
 
 	if (byte_diff) {
 		ifp->if_data = krealloc(ifp->if_data, new_size,
-					GFP_NOFS | __GFP_NOFAIL);
+					GFP_KERNEL | __GFP_NOFAIL);
 		if (new_size == 0)
 			ifp->if_data = NULL;
 		ifp->if_bytes = new_size;
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 6709a7f8bad5..7df52daa22cf 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1449,7 +1449,7 @@ __xfs_refcount_add(
 			blockcount);
 
 	ri = kmem_cache_alloc(xfs_refcount_intent_cache,
-			GFP_NOFS | __GFP_NOFAIL);
+			GFP_KERNEL | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&ri->ri_list);
 	ri->ri_type = type;
 	ri->ri_startblock = startblock;
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 76bf7f48cb5a..0bd1f47b2c2b 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2559,7 +2559,7 @@ __xfs_rmap_add(
 			bmap->br_blockcount,
 			bmap->br_state);
 
-	ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_NOFS | __GFP_NOFAIL);
+	ri = kmem_cache_alloc(xfs_rmap_intent_cache, GFP_KERNEL | __GFP_NOFAIL);
 	INIT_LIST_HEAD(&ri->ri_list);
 	ri->ri_type = type;
 	ri->ri_owner = owner;
diff --git a/fs/xfs/xfs_attr_item.c b/fs/xfs/xfs_attr_item.c
index 2a142cefdc3d..0bf25a2ba3b6 100644
--- a/fs/xfs/xfs_attr_item.c
+++ b/fs/xfs/xfs_attr_item.c
@@ -226,7 +226,7 @@ xfs_attri_init(
 {
 	struct xfs_attri_log_item	*attrip;
 
-	attrip = kmem_cache_zalloc(xfs_attri_cache, GFP_NOFS | __GFP_NOFAIL);
+	attrip = kmem_cache_zalloc(xfs_attri_cache, GFP_KERNEL | __GFP_NOFAIL);
 
 	/*
 	 * Grab an extra reference to the name/value buffer for this log item.
@@ -666,7 +666,7 @@ xfs_attr_create_done(
 
 	attrip = ATTRI_ITEM(intent);
 
-	attrdp = kmem_cache_zalloc(xfs_attrd_cache, GFP_NOFS | __GFP_NOFAIL);
+	attrdp = kmem_cache_zalloc(xfs_attrd_cache, GFP_KERNEL | __GFP_NOFAIL);
 
 	xfs_log_item_init(tp->t_mountp, &attrdp->attrd_item, XFS_LI_ATTRD,
 			  &xfs_attrd_item_ops);
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index c2531c28905c..cb2a4b940292 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -66,7 +66,7 @@ xfs_zero_extent(
 	return blkdev_issue_zeroout(target->bt_bdev,
 		block << (mp->m_super->s_blocksize_bits - 9),
 		count_fsb << (mp->m_super->s_blocksize_bits - 9),
-		GFP_NOFS, 0);
+		GFP_KERNEL, 0);
 }
 
 /*
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 10bbde3ce7cc..ab7546a9dfd7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -190,7 +190,7 @@ xfs_buf_get_maps(
 	}
 
 	bp->b_maps = kzalloc(map_count * sizeof(struct xfs_buf_map),
-				GFP_NOFS | __GFP_NOFAIL);
+			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
 	if (!bp->b_maps)
 		return -ENOMEM;
 	return 0;
@@ -222,7 +222,8 @@ _xfs_buf_alloc(
 	int			i;
 
 	*bpp = NULL;
-	bp = kmem_cache_zalloc(xfs_buf_cache, GFP_NOFS | __GFP_NOFAIL);
+	bp = kmem_cache_zalloc(xfs_buf_cache,
+			GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL);
 
 	/*
 	 * We don't want certain flags to appear in b_flags unless they are
@@ -325,7 +326,7 @@ xfs_buf_alloc_kmem(
 	struct xfs_buf	*bp,
 	xfs_buf_flags_t	flags)
 {
-	gfp_t		gfp_mask = GFP_NOFS | __GFP_NOFAIL;
+	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOFAIL;
 	size_t		size = BBTOB(bp->b_length);
 
 	/* Assure zeroed buffer for non-read cases. */
@@ -356,13 +357,11 @@ xfs_buf_alloc_pages(
 	struct xfs_buf	*bp,
 	xfs_buf_flags_t	flags)
 {
-	gfp_t		gfp_mask = __GFP_NOWARN;
+	gfp_t		gfp_mask = GFP_KERNEL | __GFP_NOLOCKDEP | __GFP_NOWARN;
 	long		filled = 0;
 
 	if (flags & XBF_READ_AHEAD)
 		gfp_mask |= __GFP_NORETRY;
-	else
-		gfp_mask |= GFP_NOFS;
 
 	/* Make sure that we have a page list */
 	bp->b_page_count = DIV_ROUND_UP(BBTOB(bp->b_length), PAGE_SIZE);
@@ -429,11 +428,18 @@ _xfs_buf_map_pages(
 
 		/*
 		 * vm_map_ram() will allocate auxiliary structures (e.g.
-		 * pagetables) with GFP_KERNEL, yet we are likely to be under
-		 * GFP_NOFS context here. Hence we need to tell memory reclaim
-		 * that we are in such a context via PF_MEMALLOC_NOFS to prevent
-		 * memory reclaim re-entering the filesystem here and
-		 * potentially deadlocking.
+		 * pagetables) with GFP_KERNEL, yet we often under a scoped nofs
+		 * context here. Mixing GFP_KERNEL with GFP_NOFS allocations
+		 * from the same call site that can be run from both above and
+		 * below memory reclaim causes lockdep false positives. Hence we
+		 * always need to force this allocation to nofs context because
+		 * we can't pass __GFP_NOLOCKDEP down to auxillary structures to
+		 * prevent false positive lockdep reports.
+		 *
+		 * XXX(dgc): I think dquot reclaim is the only place we can get
+		 * to this function from memory reclaim context now. If we fix
+		 * that like we've fixed inode reclaim to avoid writeback from
+		 * reclaim, this nofs wrapping can go away.
 		 */
 		nofs_flag = memalloc_nofs_save();
 		do {
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index ee39639bb92b..1f68569e62ca 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3518,7 +3518,8 @@ xlog_ticket_alloc(
 	struct xlog_ticket	*tic;
 	int			unit_res;
 
-	tic = kmem_cache_zalloc(xfs_log_ticket_cache, GFP_NOFS | __GFP_NOFAIL);
+	tic = kmem_cache_zalloc(xfs_log_ticket_cache,
+			GFP_KERNEL | __GFP_NOFAIL);
 
 	unit_res = xlog_calc_unit_res(log, unit_bytes, &tic->t_iclog_hdrs);
 
diff --git a/fs/xfs/xfs_mru_cache.c b/fs/xfs/xfs_mru_cache.c
index ce496704748d..7443debaffd6 100644
--- a/fs/xfs/xfs_mru_cache.c
+++ b/fs/xfs/xfs_mru_cache.c
@@ -428,7 +428,7 @@ xfs_mru_cache_insert(
 	if (!mru || !mru->lists)
 		return -EINVAL;
 
-	if (radix_tree_preload(GFP_NOFS))
+	if (radix_tree_preload(GFP_KERNEL))
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&elem->list_node);
-- 
cgit v1.2.3


From 204fae32d5f7b9ac673d3d4f636dcef8697db2f0 Mon Sep 17 00:00:00 2001
From: Dave Chinner <dchinner@redhat.com>
Date: Tue, 16 Jan 2024 09:59:49 +1100
Subject: xfs: clean up remaining GFP_NOFS users

These few remaining GFP_NOFS callers do not need to use GFP_NOFS at
all. They are only called from a non-transactional context or cannot
be accessed from memory reclaim due to other constraints. Hence they
can just use GFP_KERNEL.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: "Darrick J. Wong" <djwong@kernel.org>
Signed-off-by: Chandan Babu R <chandanbabu@kernel.org>
---
 fs/xfs/libxfs/xfs_btree_staging.c | 4 ++--
 fs/xfs/xfs_attr_list.c            | 2 +-
 fs/xfs/xfs_buf.c                  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/libxfs/xfs_btree_staging.c b/fs/xfs/libxfs/xfs_btree_staging.c
index 961f6b898f4b..f0c69f9bb169 100644
--- a/fs/xfs/libxfs/xfs_btree_staging.c
+++ b/fs/xfs/libxfs/xfs_btree_staging.c
@@ -139,7 +139,7 @@ xfs_btree_stage_afakeroot(
 	ASSERT(!(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE));
 	ASSERT(cur->bc_tp == NULL);
 
-	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_NOFS | __GFP_NOFAIL);
+	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
 	nops->free_block = xfs_btree_fakeroot_free_block;
@@ -220,7 +220,7 @@ xfs_btree_stage_ifakeroot(
 	ASSERT(cur->bc_flags & XFS_BTREE_ROOT_IN_INODE);
 	ASSERT(cur->bc_tp == NULL);
 
-	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_NOFS | __GFP_NOFAIL);
+	nops = kmalloc(sizeof(struct xfs_btree_ops), GFP_KERNEL | __GFP_NOFAIL);
 	memcpy(nops, cur->bc_ops, sizeof(struct xfs_btree_ops));
 	nops->alloc_block = xfs_btree_fakeroot_alloc_block;
 	nops->free_block = xfs_btree_fakeroot_free_block;
diff --git a/fs/xfs/xfs_attr_list.c b/fs/xfs/xfs_attr_list.c
index 0318d768520a..47453510c0ab 100644
--- a/fs/xfs/xfs_attr_list.c
+++ b/fs/xfs/xfs_attr_list.c
@@ -109,7 +109,7 @@ xfs_attr_shortform_list(
 	 * It didn't all fit, so we have to sort everything on hashval.
 	 */
 	sbsize = sf->count * sizeof(*sbuf);
-	sbp = sbuf = kmalloc(sbsize, GFP_NOFS | __GFP_NOFAIL);
+	sbp = sbuf = kmalloc(sbsize, GFP_KERNEL | __GFP_NOFAIL);
 
 	/*
 	 * Scan the attribute list for the rest of the entries, storing
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ab7546a9dfd7..fb80cae0b2e5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2008,7 +2008,7 @@ xfs_alloc_buftarg(
 #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
 	ops = &xfs_dax_holder_operations;
 #endif
-	btp = kzalloc(sizeof(*btp), GFP_NOFS | __GFP_NOFAIL);
+	btp = kzalloc(sizeof(*btp), GFP_KERNEL | __GFP_NOFAIL);
 
 	btp->bt_mount = mp;
 	btp->bt_bdev_handle = bdev_handle;
-- 
cgit v1.2.3


From 24f755e4854e0fddb78d18f610bf1b5cb61db520 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Feb 2024 12:41:02 -0800
Subject: xfs: split xfs_buf_rele for cached vs uncached buffers

xfs_buf_rele is a bit confusing because it mixes up handling of normal
cached and the special uncached buffers without much explanation.
Split the handling into two different helpers, and use a clearly named
helper that checks the hash key to distinguish the two cases instead
of checking the pag pointer.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf.c | 46 +++++++++++++++++++++++++++++++---------------
 1 file changed, 31 insertions(+), 15 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index fb80cae0b2e5..1e85780411d5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -60,6 +60,11 @@ xfs_buf_submit(
 	return __xfs_buf_submit(bp, !(bp->b_flags & XBF_ASYNC));
 }
 
+static inline bool xfs_buf_is_uncached(struct xfs_buf *bp)
+{
+	return bp->b_rhash_key == XFS_BUF_DADDR_NULL;
+}
+
 static inline int
 xfs_buf_is_vmapped(
 	struct xfs_buf	*bp)
@@ -996,12 +1001,19 @@ xfs_buf_hold(
 	atomic_inc(&bp->b_hold);
 }
 
-/*
- * Release a hold on the specified buffer. If the hold count is 1, the buffer is
- * placed on LRU or freed (depending on b_lru_ref).
- */
-void
-xfs_buf_rele(
+static void
+xfs_buf_rele_uncached(
+	struct xfs_buf		*bp)
+{
+	ASSERT(list_empty(&bp->b_lru));
+	if (atomic_dec_and_test(&bp->b_hold)) {
+		xfs_buf_ioacct_dec(bp);
+		xfs_buf_free(bp);
+	}
+}
+
+static void
+xfs_buf_rele_cached(
 	struct xfs_buf		*bp)
 {
 	struct xfs_perag	*pag = bp->b_pag;
@@ -1010,15 +1022,6 @@ xfs_buf_rele(
 
 	trace_xfs_buf_rele(bp, _RET_IP_);
 
-	if (!pag) {
-		ASSERT(list_empty(&bp->b_lru));
-		if (atomic_dec_and_test(&bp->b_hold)) {
-			xfs_buf_ioacct_dec(bp);
-			xfs_buf_free(bp);
-		}
-		return;
-	}
-
 	ASSERT(atomic_read(&bp->b_hold) > 0);
 
 	/*
@@ -1086,6 +1089,19 @@ out_unlock:
 		xfs_buf_free(bp);
 }
 
+/*
+ * Release a hold on the specified buffer.
+ */
+void
+xfs_buf_rele(
+	struct xfs_buf		*bp)
+{
+	trace_xfs_buf_rele(bp, _RET_IP_);
+	if (xfs_buf_is_uncached(bp))
+		xfs_buf_rele_uncached(bp);
+	else
+		xfs_buf_rele_cached(bp);
+}
 
 /*
  *	Lock a buffer object, if it is not already locked.
-- 
cgit v1.2.3


From 21e308e6485542a9795144aa6a39a1edc83a0d31 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Feb 2024 12:42:44 -0800
Subject: xfs: remove the xfs_buftarg_t typedef

Switch the few remaining holdouts to the struct version.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf.c   |  6 +++---
 fs/xfs/xfs_buf.h   |  4 ++--
 fs/xfs/xfs_log.c   | 16 ++++++++--------
 fs/xfs/xfs_mount.h |  6 +++---
 4 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1e85780411d5..1777d834fd54 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1980,7 +1980,7 @@ xfs_free_buftarg(
 
 int
 xfs_setsize_buftarg(
-	xfs_buftarg_t		*btp,
+	struct xfs_buftarg	*btp,
 	unsigned int		sectorsize)
 {
 	/* Set up metadata sector size info */
@@ -2008,7 +2008,7 @@ xfs_setsize_buftarg(
  */
 STATIC int
 xfs_setsize_buftarg_early(
-	xfs_buftarg_t		*btp)
+	struct xfs_buftarg	*btp)
 {
 	return xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev));
 }
@@ -2018,7 +2018,7 @@ xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
 	struct bdev_handle	*bdev_handle)
 {
-	xfs_buftarg_t		*btp;
+	struct xfs_buftarg	*btp;
 	const struct dax_holder_operations *ops = NULL;
 
 #if defined(CONFIG_FS_DAX) && defined(CONFIG_MEMORY_FAILURE)
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b470de08a46c..b9216dee7810 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -96,7 +96,7 @@ typedef unsigned int xfs_buf_flags_t;
  * The latter is derived from the underlying device, and controls direct IO
  * alignment constraints.
  */
-typedef struct xfs_buftarg {
+struct xfs_buftarg {
 	dev_t			bt_dev;
 	struct bdev_handle	*bt_bdev_handle;
 	struct block_device	*bt_bdev;
@@ -114,7 +114,7 @@ typedef struct xfs_buftarg {
 
 	struct percpu_counter	bt_io_count;
 	struct ratelimit_state	bt_ioerror_rl;
-} xfs_buftarg_t;
+};
 
 #define XB_PAGES	2
 
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 1f68569e62ca..5004f23d344e 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -633,14 +633,14 @@ xlog_state_release_iclog(
  */
 int
 xfs_log_mount(
-	xfs_mount_t	*mp,
-	xfs_buftarg_t	*log_target,
-	xfs_daddr_t	blk_offset,
-	int		num_bblks)
-{
-	struct xlog	*log;
-	int		error = 0;
-	int		min_logfsbs;
+	xfs_mount_t		*mp,
+	struct xfs_buftarg	*log_target,
+	xfs_daddr_t		blk_offset,
+	int			num_bblks)
+{
+	struct xlog		*log;
+	int			error = 0;
+	int			min_logfsbs;
 
 	if (!xfs_has_norecovery(mp)) {
 		xfs_notice(mp, "Mounting V%d Filesystem %pU",
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index e86dfe67894f..6c44e6db4d86 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -94,9 +94,9 @@ typedef struct xfs_mount {
 	struct xfs_inode	*m_rsumip;	/* pointer to summary inode */
 	struct xfs_inode	*m_rootip;	/* pointer to root directory */
 	struct xfs_quotainfo	*m_quotainfo;	/* disk quota information */
-	xfs_buftarg_t		*m_ddev_targp;	/* saves taking the address */
-	xfs_buftarg_t		*m_logdev_targp;/* ptr to log device */
-	xfs_buftarg_t		*m_rtdev_targp;	/* ptr to rt device */
+	struct xfs_buftarg	*m_ddev_targp;	/* data device */
+	struct xfs_buftarg	*m_logdev_targp;/* log device */
+	struct xfs_buftarg	*m_rtdev_targp;	/* rt device */
 	void __percpu		*m_inodegc;	/* percpu inodegc structures */
 
 	/*
-- 
cgit v1.2.3


From 60335cc0fb5c7a8adfc84ba7dc976a00b6133499 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Feb 2024 12:42:45 -0800
Subject: xfs: remove xfs_setsize_buftarg_early

Open code the logic in the only caller, and improve the comment
explaining what is being done here.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf.c | 22 +++++++---------------
 1 file changed, 7 insertions(+), 15 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 1777d834fd54..e83cadea239f 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -2001,18 +2001,6 @@ xfs_setsize_buftarg(
 	return 0;
 }
 
-/*
- * When allocating the initial buffer target we have not yet
- * read in the superblock, so don't know what sized sectors
- * are being used at this early stage.  Play safe.
- */
-STATIC int
-xfs_setsize_buftarg_early(
-	struct xfs_buftarg	*btp)
-{
-	return xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev));
-}
-
 struct xfs_buftarg *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
@@ -2033,6 +2021,13 @@ xfs_alloc_buftarg(
 	btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
 					    mp, ops);
 
+	/*
+	 * When allocating the buftargs we have not yet read the super block and
+	 * thus don't know the file system sector size yet.
+	 */
+	if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
+		goto error_free;
+
 	/*
 	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
 	 * per 30 seconds so as to not spam logs too much on repeated errors.
@@ -2040,9 +2035,6 @@ xfs_alloc_buftarg(
 	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
 			     DEFAULT_RATELIMIT_BURST);
 
-	if (xfs_setsize_buftarg_early(btp))
-		goto error_free;
-
 	if (list_lru_init(&btp->bt_lru))
 		goto error_free;
 
-- 
cgit v1.2.3


From 1c51ac0998ed9baaca3ac75c0083b4c3b4d993ef Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 22 Feb 2024 12:42:45 -0800
Subject: xfs: move setting bt_logical_sectorsize out of xfs_setsize_buftarg

bt_logical_sectorsize and the associated mask is set based on the
constant logical block size in the block_device structure and thus
doesn't need to be updated in xfs_setsize_buftarg.  Move it into
xfs_alloc_buftarg so that it is only done once per buftarg.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Darrick J. Wong <djwong@kernel.org>
Signed-off-by: Darrick J. Wong <djwong@kernel.org>
---
 fs/xfs/xfs_buf.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index e83cadea239f..ff243e3176a5 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1994,10 +1994,6 @@ xfs_setsize_buftarg(
 		return -EINVAL;
 	}
 
-	/* Set up device logical sector size mask */
-	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
-	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
-
 	return 0;
 }
 
@@ -2028,6 +2024,10 @@ xfs_alloc_buftarg(
 	if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
 		goto error_free;
 
+	/* Set up device logical sector size mask */
+	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
+	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
+
 	/*
 	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
 	 * per 30 seconds so as to not spam logs too much on repeated errors.
-- 
cgit v1.2.3


From e7b58f7c1be20550d4f51cec6307b811e7555f52 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 22 Feb 2024 12:42:58 -0800
Subject: xfs: teach buftargs to maintain their own buffer hashtable

Currently, cached buffers are indexed by per-AG hashtables.  This works
great for the data device, but won't work for in-memory btrees.  To
handle that use case, buftargs will need to be able to index buffers
independently of other data structures.

We accomplish this by hoisting the rhashtable and its lock into a
separate xfs_buf_cache structure, make the buftarg point to the
_buf_cache structure, and rework various functions to use it.  This
will enable the in-memory buftarg to come up with its own _buf_cache.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/libxfs/xfs_ag.c |  6 ++--
 fs/xfs/libxfs/xfs_ag.h |  4 +--
 fs/xfs/xfs_buf.c       | 84 +++++++++++++++++++++++++++++++++-----------------
 fs/xfs/xfs_buf.h       |  8 +++++
 fs/xfs/xfs_mount.h     |  3 --
 5 files changed, 67 insertions(+), 38 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index e598e14320be..32d80a76440c 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -264,7 +264,7 @@ xfs_free_perag(
 		xfs_defer_drain_free(&pag->pag_intents_drain);
 
 		cancel_delayed_work_sync(&pag->pag_blockgc_work);
-		xfs_buf_hash_destroy(pag);
+		xfs_buf_cache_destroy(&pag->pag_bcache);
 
 		/* drop the mount's active reference */
 		xfs_perag_rele(pag);
@@ -352,7 +352,7 @@ xfs_free_unused_perag_range(
 		spin_unlock(&mp->m_perag_lock);
 		if (!pag)
 			break;
-		xfs_buf_hash_destroy(pag);
+		xfs_buf_cache_destroy(&pag->pag_bcache);
 		xfs_defer_drain_free(&pag->pag_intents_drain);
 		kfree(pag);
 	}
@@ -419,7 +419,7 @@ xfs_initialize_perag(
 		pag->pagb_tree = RB_ROOT;
 #endif /* __KERNEL__ */
 
-		error = xfs_buf_hash_init(pag);
+		error = xfs_buf_cache_init(&pag->pag_bcache);
 		if (error)
 			goto out_remove_pag;
 
diff --git a/fs/xfs/libxfs/xfs_ag.h b/fs/xfs/libxfs/xfs_ag.h
index 19eddba09894..29bfa6273dec 100644
--- a/fs/xfs/libxfs/xfs_ag.h
+++ b/fs/xfs/libxfs/xfs_ag.h
@@ -106,9 +106,7 @@ struct xfs_perag {
 	int		pag_ici_reclaimable;	/* reclaimable inodes */
 	unsigned long	pag_ici_reclaim_cursor;	/* reclaim restart point */
 
-	/* buffer cache index */
-	spinlock_t	pag_buf_lock;	/* lock for pag_buf_hash */
-	struct rhashtable pag_buf_hash;
+	struct xfs_buf_cache	pag_bcache;
 
 	/* background prealloc block trimming */
 	struct delayed_work	pag_blockgc_work;
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index ff243e3176a5..6b979dfa339a 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -510,18 +510,18 @@ static const struct rhashtable_params xfs_buf_hash_params = {
 };
 
 int
-xfs_buf_hash_init(
-	struct xfs_perag	*pag)
+xfs_buf_cache_init(
+	struct xfs_buf_cache	*bch)
 {
-	spin_lock_init(&pag->pag_buf_lock);
-	return rhashtable_init(&pag->pag_buf_hash, &xfs_buf_hash_params);
+	spin_lock_init(&bch->bc_lock);
+	return rhashtable_init(&bch->bc_hash, &xfs_buf_hash_params);
 }
 
 void
-xfs_buf_hash_destroy(
-	struct xfs_perag	*pag)
+xfs_buf_cache_destroy(
+	struct xfs_buf_cache	*bch)
 {
-	rhashtable_destroy(&pag->pag_buf_hash);
+	rhashtable_destroy(&bch->bc_hash);
 }
 
 static int
@@ -584,7 +584,7 @@ xfs_buf_find_lock(
 
 static inline int
 xfs_buf_lookup(
-	struct xfs_perag	*pag,
+	struct xfs_buf_cache	*bch,
 	struct xfs_buf_map	*map,
 	xfs_buf_flags_t		flags,
 	struct xfs_buf		**bpp)
@@ -593,7 +593,7 @@ xfs_buf_lookup(
 	int			error;
 
 	rcu_read_lock();
-	bp = rhashtable_lookup(&pag->pag_buf_hash, map, xfs_buf_hash_params);
+	bp = rhashtable_lookup(&bch->bc_hash, map, xfs_buf_hash_params);
 	if (!bp || !atomic_inc_not_zero(&bp->b_hold)) {
 		rcu_read_unlock();
 		return -ENOENT;
@@ -618,6 +618,7 @@ xfs_buf_lookup(
 static int
 xfs_buf_find_insert(
 	struct xfs_buftarg	*btp,
+	struct xfs_buf_cache	*bch,
 	struct xfs_perag	*pag,
 	struct xfs_buf_map	*cmap,
 	struct xfs_buf_map	*map,
@@ -646,18 +647,18 @@ xfs_buf_find_insert(
 			goto out_free_buf;
 	}
 
-	spin_lock(&pag->pag_buf_lock);
-	bp = rhashtable_lookup_get_insert_fast(&pag->pag_buf_hash,
+	spin_lock(&bch->bc_lock);
+	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
 			&new_bp->b_rhash_head, xfs_buf_hash_params);
 	if (IS_ERR(bp)) {
 		error = PTR_ERR(bp);
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 		goto out_free_buf;
 	}
 	if (bp) {
 		/* found an existing buffer */
 		atomic_inc(&bp->b_hold);
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 		error = xfs_buf_find_lock(bp, flags);
 		if (error)
 			xfs_buf_rele(bp);
@@ -668,17 +669,36 @@ xfs_buf_find_insert(
 
 	/* The new buffer keeps the perag reference until it is freed. */
 	new_bp->b_pag = pag;
-	spin_unlock(&pag->pag_buf_lock);
+	spin_unlock(&bch->bc_lock);
 	*bpp = new_bp;
 	return 0;
 
 out_free_buf:
 	xfs_buf_free(new_bp);
 out_drop_pag:
-	xfs_perag_put(pag);
+	if (pag)
+		xfs_perag_put(pag);
 	return error;
 }
 
+static inline struct xfs_perag *
+xfs_buftarg_get_pag(
+	struct xfs_buftarg		*btp,
+	const struct xfs_buf_map	*map)
+{
+	struct xfs_mount		*mp = btp->bt_mount;
+
+	return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
+}
+
+static inline struct xfs_buf_cache *
+xfs_buftarg_buf_cache(
+	struct xfs_buftarg		*btp,
+	struct xfs_perag		*pag)
+{
+	return &pag->pag_bcache;
+}
+
 /*
  * Assembles a buffer covering the specified range. The code is optimised for
  * cache hits, as metadata intensive workloads will see 3 orders of magnitude
@@ -692,6 +712,7 @@ xfs_buf_get_map(
 	xfs_buf_flags_t		flags,
 	struct xfs_buf		**bpp)
 {
+	struct xfs_buf_cache	*bch;
 	struct xfs_perag	*pag;
 	struct xfs_buf		*bp = NULL;
 	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
@@ -707,10 +728,10 @@ xfs_buf_get_map(
 	if (error)
 		return error;
 
-	pag = xfs_perag_get(btp->bt_mount,
-			    xfs_daddr_to_agno(btp->bt_mount, cmap.bm_bn));
+	pag = xfs_buftarg_get_pag(btp, &cmap);
+	bch = xfs_buftarg_buf_cache(btp, pag);
 
-	error = xfs_buf_lookup(pag, &cmap, flags, &bp);
+	error = xfs_buf_lookup(bch, &cmap, flags, &bp);
 	if (error && error != -ENOENT)
 		goto out_put_perag;
 
@@ -722,13 +743,14 @@ xfs_buf_get_map(
 			goto out_put_perag;
 
 		/* xfs_buf_find_insert() consumes the perag reference. */
-		error = xfs_buf_find_insert(btp, pag, &cmap, map, nmaps,
+		error = xfs_buf_find_insert(btp, bch, pag, &cmap, map, nmaps,
 				flags, &bp);
 		if (error)
 			return error;
 	} else {
 		XFS_STATS_INC(btp->bt_mount, xb_get_locked);
-		xfs_perag_put(pag);
+		if (pag)
+			xfs_perag_put(pag);
 	}
 
 	/* We do not hold a perag reference anymore. */
@@ -756,7 +778,8 @@ xfs_buf_get_map(
 	return 0;
 
 out_put_perag:
-	xfs_perag_put(pag);
+	if (pag)
+		xfs_perag_put(pag);
 	return error;
 }
 
@@ -1016,7 +1039,9 @@ static void
 xfs_buf_rele_cached(
 	struct xfs_buf		*bp)
 {
+	struct xfs_buftarg	*btp = bp->b_target;
 	struct xfs_perag	*pag = bp->b_pag;
+	struct xfs_buf_cache	*bch = xfs_buftarg_buf_cache(btp, pag);
 	bool			release;
 	bool			freebuf = false;
 
@@ -1035,7 +1060,7 @@ xfs_buf_rele_cached(
 	 * leading to a use-after-free scenario.
 	 */
 	spin_lock(&bp->b_lock);
-	release = atomic_dec_and_lock(&bp->b_hold, &pag->pag_buf_lock);
+	release = atomic_dec_and_lock(&bp->b_hold, &bch->bc_lock);
 	if (!release) {
 		/*
 		 * Drop the in-flight state if the buffer is already on the LRU
@@ -1056,11 +1081,11 @@ xfs_buf_rele_cached(
 		 * buffer for the LRU and clear the (now stale) dispose list
 		 * state flag
 		 */
-		if (list_lru_add_obj(&bp->b_target->bt_lru, &bp->b_lru)) {
+		if (list_lru_add_obj(&btp->bt_lru, &bp->b_lru)) {
 			bp->b_state &= ~XFS_BSTATE_DISPOSE;
 			atomic_inc(&bp->b_hold);
 		}
-		spin_unlock(&pag->pag_buf_lock);
+		spin_unlock(&bch->bc_lock);
 	} else {
 		/*
 		 * most of the time buffers will already be removed from the
@@ -1069,16 +1094,17 @@ xfs_buf_rele_cached(
 		 * was on was the disposal list
 		 */
 		if (!(bp->b_state & XFS_BSTATE_DISPOSE)) {
-			list_lru_del_obj(&bp->b_target->bt_lru, &bp->b_lru);
+			list_lru_del_obj(&btp->bt_lru, &bp->b_lru);
 		} else {
 			ASSERT(list_empty(&bp->b_lru));
 		}
 
 		ASSERT(!(bp->b_flags & _XBF_DELWRI_Q));
-		rhashtable_remove_fast(&pag->pag_buf_hash, &bp->b_rhash_head,
-				       xfs_buf_hash_params);
-		spin_unlock(&pag->pag_buf_lock);
-		xfs_perag_put(pag);
+		rhashtable_remove_fast(&bch->bc_hash, &bp->b_rhash_head,
+				xfs_buf_hash_params);
+		spin_unlock(&bch->bc_lock);
+		if (pag)
+			xfs_perag_put(pag);
 		freebuf = true;
 	}
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b9216dee7810..7b01df6dcd50 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -83,6 +83,14 @@ typedef unsigned int xfs_buf_flags_t;
 #define XFS_BSTATE_DISPOSE	 (1 << 0)	/* buffer being discarded */
 #define XFS_BSTATE_IN_FLIGHT	 (1 << 1)	/* I/O in flight */
 
+struct xfs_buf_cache {
+	spinlock_t		bc_lock;
+	struct rhashtable	bc_hash;
+};
+
+int xfs_buf_cache_init(struct xfs_buf_cache *bch);
+void xfs_buf_cache_destroy(struct xfs_buf_cache *bch);
+
 /*
  * The xfs_buftarg contains 2 notions of "sector size" -
  *
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index 6c44e6db4d86..e880aa48de68 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -505,9 +505,6 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
 	return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
 }
 
-int xfs_buf_hash_init(struct xfs_perag *pag);
-void xfs_buf_hash_destroy(struct xfs_perag *pag);
-
 extern void	xfs_uuid_table_free(void);
 extern uint64_t xfs_default_resblks(xfs_mount_t *mp);
 extern int	xfs_mountfs(xfs_mount_t *mp);
-- 
cgit v1.2.3


From 5076a6040ca1613e616d84aecfaac5f932db84e0 Mon Sep 17 00:00:00 2001
From: "Darrick J. Wong" <djwong@kernel.org>
Date: Thu, 22 Feb 2024 12:43:21 -0800
Subject: xfs: support in-memory buffer cache targets

Allow the buffer cache to target in-memory files by making it possible
to have a buftarg that maps pages from private shmem files.  As the
prevous patch alludes, the in-memory buftarg contains its own cache,
points to a shmem file, and does not point to a block_device.

The next few patches will make it possible to construct an xfs_btree in
pageable memory by using this buftarg.

Signed-off-by: Darrick J. Wong <djwong@kernel.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/xfs/Kconfig       |   4 ++
 fs/xfs/Makefile      |   1 +
 fs/xfs/xfs_buf.c     | 132 ++++++++++++++++++++++-------------
 fs/xfs/xfs_buf.h     |   9 +++
 fs/xfs/xfs_buf_mem.c | 189 +++++++++++++++++++++++++++++++++++++++++++++++++++
 fs/xfs/xfs_buf_mem.h |  30 ++++++++
 fs/xfs/xfs_trace.c   |   1 +
 fs/xfs/xfs_trace.h   |  49 +++++++++++++
 8 files changed, 369 insertions(+), 46 deletions(-)
 create mode 100644 fs/xfs/xfs_buf_mem.c
 create mode 100644 fs/xfs/xfs_buf_mem.h

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/Kconfig b/fs/xfs/Kconfig
index fa7eb3e2a248..7017ea0fb4cd 100644
--- a/fs/xfs/Kconfig
+++ b/fs/xfs/Kconfig
@@ -128,6 +128,9 @@ config XFS_LIVE_HOOKS
 	bool
 	select JUMP_LABEL if HAVE_ARCH_JUMP_LABEL
 
+config XFS_MEMORY_BUFS
+	bool
+
 config XFS_ONLINE_SCRUB
 	bool "XFS online metadata check support"
 	default n
@@ -135,6 +138,7 @@ config XFS_ONLINE_SCRUB
 	depends on TMPFS && SHMEM
 	select XFS_LIVE_HOOKS
 	select XFS_DRAIN_INTENTS
+	select XFS_MEMORY_BUFS
 	help
 	  If you say Y here you will be able to check metadata on a
 	  mounted XFS filesystem.  This feature is intended to reduce
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index ba8608f469ac..045874235b82 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -137,6 +137,7 @@ endif
 
 xfs-$(CONFIG_XFS_DRAIN_INTENTS)	+= xfs_drain.o
 xfs-$(CONFIG_XFS_LIVE_HOOKS)	+= xfs_hooks.o
+xfs-$(CONFIG_XFS_MEMORY_BUFS)	+= xfs_buf_mem.o
 
 # online scrub/repair
 ifeq ($(CONFIG_XFS_ONLINE_SCRUB),y)
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 6b979dfa339a..7fc26e64368d 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -21,6 +21,7 @@
 #include "xfs_errortag.h"
 #include "xfs_error.h"
 #include "xfs_ag.h"
+#include "xfs_buf_mem.h"
 
 struct kmem_cache *xfs_buf_cache;
 
@@ -318,7 +319,9 @@ xfs_buf_free(
 
 	ASSERT(list_empty(&bp->b_lru));
 
-	if (bp->b_flags & _XBF_PAGES)
+	if (xfs_buftarg_is_mem(bp->b_target))
+		xmbuf_unmap_page(bp);
+	else if (bp->b_flags & _XBF_PAGES)
 		xfs_buf_free_pages(bp);
 	else if (bp->b_flags & _XBF_KMEM)
 		kfree(bp->b_addr);
@@ -634,18 +637,20 @@ xfs_buf_find_insert(
 	if (error)
 		goto out_drop_pag;
 
-	/*
-	 * For buffers that fit entirely within a single page, first attempt to
-	 * allocate the memory from the heap to minimise memory usage. If we
-	 * can't get heap memory for these small buffers, we fall back to using
-	 * the page allocator.
-	 */
-	if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
-	    xfs_buf_alloc_kmem(new_bp, flags) < 0) {
+	if (xfs_buftarg_is_mem(new_bp->b_target)) {
+		error = xmbuf_map_page(new_bp);
+	} else if (BBTOB(new_bp->b_length) >= PAGE_SIZE ||
+		   xfs_buf_alloc_kmem(new_bp, flags) < 0) {
+		/*
+		 * For buffers that fit entirely within a single page, first
+		 * attempt to allocate the memory from the heap to minimise
+		 * memory usage. If we can't get heap memory for these small
+		 * buffers, we fall back to using the page allocator.
+		 */
 		error = xfs_buf_alloc_pages(new_bp, flags);
-		if (error)
-			goto out_free_buf;
 	}
+	if (error)
+		goto out_free_buf;
 
 	spin_lock(&bch->bc_lock);
 	bp = rhashtable_lookup_get_insert_fast(&bch->bc_hash,
@@ -688,6 +693,8 @@ xfs_buftarg_get_pag(
 {
 	struct xfs_mount		*mp = btp->bt_mount;
 
+	if (xfs_buftarg_is_mem(btp))
+		return NULL;
 	return xfs_perag_get(mp, xfs_daddr_to_agno(mp, map->bm_bn));
 }
 
@@ -696,7 +703,9 @@ xfs_buftarg_buf_cache(
 	struct xfs_buftarg		*btp,
 	struct xfs_perag		*pag)
 {
-	return &pag->pag_bcache;
+	if (pag)
+		return &pag->pag_bcache;
+	return btp->bt_cache;
 }
 
 /*
@@ -926,6 +935,13 @@ xfs_buf_readahead_map(
 {
 	struct xfs_buf		*bp;
 
+	/*
+	 * Currently we don't have a good means or justification for performing
+	 * xmbuf_map_page asynchronously, so we don't do readahead.
+	 */
+	if (xfs_buftarg_is_mem(target))
+		return;
+
 	xfs_buf_read_map(target, map, nmaps,
 		     XBF_TRYLOCK | XBF_ASYNC | XBF_READ_AHEAD, &bp, ops,
 		     __this_address);
@@ -991,7 +1007,10 @@ xfs_buf_get_uncached(
 	if (error)
 		return error;
 
-	error = xfs_buf_alloc_pages(bp, flags);
+	if (xfs_buftarg_is_mem(bp->b_target))
+		error = xmbuf_map_page(bp);
+	else
+		error = xfs_buf_alloc_pages(bp, flags);
 	if (error)
 		goto fail_free_buf;
 
@@ -1633,6 +1652,12 @@ _xfs_buf_ioapply(
 	/* we only use the buffer cache for meta-data */
 	op |= REQ_META;
 
+	/* in-memory targets are directly mapped, no IO required. */
+	if (xfs_buftarg_is_mem(bp->b_target)) {
+		xfs_buf_ioend(bp);
+		return;
+	}
+
 	/*
 	 * Walk all the vectors issuing IO on them. Set up the initial offset
 	 * into the buffer and the desired IO size before we start -
@@ -1988,19 +2013,24 @@ xfs_buftarg_shrink_count(
 }
 
 void
-xfs_free_buftarg(
+xfs_destroy_buftarg(
 	struct xfs_buftarg	*btp)
 {
 	shrinker_free(btp->bt_shrinker);
 	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
 	percpu_counter_destroy(&btp->bt_io_count);
 	list_lru_destroy(&btp->bt_lru);
+}
 
+void
+xfs_free_buftarg(
+	struct xfs_buftarg	*btp)
+{
+	xfs_destroy_buftarg(btp);
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 	/* the main block device is closed by kill_block_super */
 	if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
 		bdev_release(btp->bt_bdev_handle);
-
 	kfree(btp);
 }
 
@@ -2023,6 +2053,45 @@ xfs_setsize_buftarg(
 	return 0;
 }
 
+int
+xfs_init_buftarg(
+	struct xfs_buftarg		*btp,
+	size_t				logical_sectorsize,
+	const char			*descr)
+{
+	/* Set up device logical sector size mask */
+	btp->bt_logical_sectorsize = logical_sectorsize;
+	btp->bt_logical_sectormask = logical_sectorsize - 1;
+
+	/*
+	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
+	 * per 30 seconds so as to not spam logs too much on repeated errors.
+	 */
+	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
+			     DEFAULT_RATELIMIT_BURST);
+
+	if (list_lru_init(&btp->bt_lru))
+		return -ENOMEM;
+	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
+		goto out_destroy_lru;
+
+	btp->bt_shrinker =
+		shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s", descr);
+	if (!btp->bt_shrinker)
+		goto out_destroy_io_count;
+	btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
+	btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
+	btp->bt_shrinker->private_data = btp;
+	shrinker_register(btp->bt_shrinker);
+	return 0;
+
+out_destroy_io_count:
+	percpu_counter_destroy(&btp->bt_io_count);
+out_destroy_lru:
+	list_lru_destroy(&btp->bt_lru);
+	return -ENOMEM;
+}
+
 struct xfs_buftarg *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
@@ -2049,41 +2118,12 @@ xfs_alloc_buftarg(
 	 */
 	if (xfs_setsize_buftarg(btp, bdev_logical_block_size(btp->bt_bdev)))
 		goto error_free;
-
-	/* Set up device logical sector size mask */
-	btp->bt_logical_sectorsize = bdev_logical_block_size(btp->bt_bdev);
-	btp->bt_logical_sectormask = bdev_logical_block_size(btp->bt_bdev) - 1;
-
-	/*
-	 * Buffer IO error rate limiting. Limit it to no more than 10 messages
-	 * per 30 seconds so as to not spam logs too much on repeated errors.
-	 */
-	ratelimit_state_init(&btp->bt_ioerror_rl, 30 * HZ,
-			     DEFAULT_RATELIMIT_BURST);
-
-	if (list_lru_init(&btp->bt_lru))
+	if (xfs_init_buftarg(btp, bdev_logical_block_size(btp->bt_bdev),
+			mp->m_super->s_id))
 		goto error_free;
 
-	if (percpu_counter_init(&btp->bt_io_count, 0, GFP_KERNEL))
-		goto error_lru;
-
-	btp->bt_shrinker = shrinker_alloc(SHRINKER_NUMA_AWARE, "xfs-buf:%s",
-					  mp->m_super->s_id);
-	if (!btp->bt_shrinker)
-		goto error_pcpu;
-
-	btp->bt_shrinker->count_objects = xfs_buftarg_shrink_count;
-	btp->bt_shrinker->scan_objects = xfs_buftarg_shrink_scan;
-	btp->bt_shrinker->private_data = btp;
-
-	shrinker_register(btp->bt_shrinker);
-
 	return btp;
 
-error_pcpu:
-	percpu_counter_destroy(&btp->bt_io_count);
-error_lru:
-	list_lru_destroy(&btp->bt_lru);
 error_free:
 	kfree(btp);
 	return NULL;
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index 7b01df6dcd50..73249abca968 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -109,6 +109,7 @@ struct xfs_buftarg {
 	struct bdev_handle	*bt_bdev_handle;
 	struct block_device	*bt_bdev;
 	struct dax_device	*bt_daxdev;
+	struct file		*bt_file;
 	u64			bt_dax_part_off;
 	struct xfs_mount	*bt_mount;
 	unsigned int		bt_meta_sectorsize;
@@ -122,6 +123,9 @@ struct xfs_buftarg {
 
 	struct percpu_counter	bt_io_count;
 	struct ratelimit_state	bt_ioerror_rl;
+
+	/* built-in cache, if we're not using the perag one */
+	struct xfs_buf_cache	bt_cache[];
 };
 
 #define XB_PAGES	2
@@ -387,4 +391,9 @@ int xfs_buf_reverify(struct xfs_buf *bp, const struct xfs_buf_ops *ops);
 bool xfs_verify_magic(struct xfs_buf *bp, __be32 dmagic);
 bool xfs_verify_magic16(struct xfs_buf *bp, __be16 dmagic);
 
+/* for xfs_buf_mem.c only: */
+int xfs_init_buftarg(struct xfs_buftarg *btp, size_t logical_sectorsize,
+		const char *descr);
+void xfs_destroy_buftarg(struct xfs_buftarg *btp);
+
 #endif	/* __XFS_BUF_H__ */
diff --git a/fs/xfs/xfs_buf_mem.c b/fs/xfs/xfs_buf_mem.c
new file mode 100644
index 000000000000..be71ba1a3d7b
--- /dev/null
+++ b/fs/xfs/xfs_buf_mem.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#include "xfs.h"
+#include "xfs_fs.h"
+#include "xfs_buf.h"
+#include "xfs_buf_mem.h"
+#include "xfs_trace.h"
+#include <linux/shmem_fs.h>
+
+/*
+ * Buffer Cache for In-Memory Files
+ * ================================
+ *
+ * Online fsck wants to create ephemeral ordered recordsets.  The existing
+ * btree infrastructure can do this, but we need the buffer cache to target
+ * memory instead of block devices.
+ *
+ * When CONFIG_TMPFS=y, shmemfs is enough of a filesystem to meet those
+ * requirements.  Therefore, the xmbuf mechanism uses an unlinked shmem file to
+ * store our staging data.  This file is not installed in the file descriptor
+ * table so that user programs cannot access the data, which means that the
+ * xmbuf must be freed with xmbuf_destroy.
+ *
+ * xmbufs assume that the caller will handle all required concurrency
+ * management; standard vfs locks (freezer and inode) are not taken.  Reads
+ * and writes are satisfied directly from the page cache.
+ *
+ * The only supported block size is PAGE_SIZE, and we cannot use highmem.
+ */
+
+/*
+ * shmem files used to back an in-memory buffer cache must not be exposed to
+ * userspace.  Upper layers must coordinate access to the one handle returned
+ * by the constructor, so establish a separate lock class for xmbufs to avoid
+ * confusing lockdep.
+ */
+static struct lock_class_key xmbuf_i_mutex_key;
+
+/*
+ * Allocate a buffer cache target for a memory-backed file and set up the
+ * buffer target.
+ */
+int
+xmbuf_alloc(
+	struct xfs_mount	*mp,
+	const char		*descr,
+	struct xfs_buftarg	**btpp)
+{
+	struct file		*file;
+	struct inode		*inode;
+	struct xfs_buftarg	*btp;
+	int			error;
+
+	btp = kzalloc(struct_size(btp, bt_cache, 1), GFP_KERNEL);
+	if (!btp)
+		return -ENOMEM;
+
+	file = shmem_kernel_file_setup(descr, 0, 0);
+	if (IS_ERR(file)) {
+		error = PTR_ERR(file);
+		goto out_free_btp;
+	}
+	inode = file_inode(file);
+
+	/* private file, private locking */
+	lockdep_set_class(&inode->i_rwsem, &xmbuf_i_mutex_key);
+
+	/*
+	 * We don't want to bother with kmapping data during repair, so don't
+	 * allow highmem pages to back this mapping.
+	 */
+	mapping_set_gfp_mask(inode->i_mapping, GFP_KERNEL);
+
+	/* ensure all writes are below EOF to avoid pagecache zeroing */
+	i_size_write(inode, inode->i_sb->s_maxbytes);
+
+	trace_xmbuf_create(btp);
+
+	error = xfs_buf_cache_init(btp->bt_cache);
+	if (error)
+		goto out_file;
+
+	/* Initialize buffer target */
+	btp->bt_mount = mp;
+	btp->bt_dev = (dev_t)-1U;
+	btp->bt_bdev = NULL; /* in-memory buftargs have no bdev */
+	btp->bt_file = file;
+	btp->bt_meta_sectorsize = XMBUF_BLOCKSIZE;
+	btp->bt_meta_sectormask = XMBUF_BLOCKSIZE - 1;
+
+	error = xfs_init_buftarg(btp, XMBUF_BLOCKSIZE, descr);
+	if (error)
+		goto out_bcache;
+
+	*btpp = btp;
+	return 0;
+
+out_bcache:
+	xfs_buf_cache_destroy(btp->bt_cache);
+out_file:
+	fput(file);
+out_free_btp:
+	kfree(btp);
+	return error;
+}
+
+/* Free a buffer cache target for a memory-backed buffer cache. */
+void
+xmbuf_free(
+	struct xfs_buftarg	*btp)
+{
+	ASSERT(xfs_buftarg_is_mem(btp));
+	ASSERT(percpu_counter_sum(&btp->bt_io_count) == 0);
+
+	trace_xmbuf_free(btp);
+
+	xfs_destroy_buftarg(btp);
+	xfs_buf_cache_destroy(btp->bt_cache);
+	fput(btp->bt_file);
+	kfree(btp);
+}
+
+/* Directly map a shmem page into the buffer cache. */
+int
+xmbuf_map_page(
+	struct xfs_buf		*bp)
+{
+	struct inode		*inode = file_inode(bp->b_target->bt_file);
+	struct folio		*folio = NULL;
+	struct page		*page;
+	loff_t                  pos = BBTOB(xfs_buf_daddr(bp));
+	int			error;
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	if (bp->b_map_count != 1)
+		return -ENOMEM;
+	if (BBTOB(bp->b_length) != XMBUF_BLOCKSIZE)
+		return -ENOMEM;
+	if (offset_in_page(pos) != 0) {
+		ASSERT(offset_in_page(pos));
+		return -ENOMEM;
+	}
+
+	error = shmem_get_folio(inode, pos >> PAGE_SHIFT, &folio, SGP_CACHE);
+	if (error)
+		return error;
+
+	if (filemap_check_wb_err(inode->i_mapping, 0)) {
+		folio_unlock(folio);
+		folio_put(folio);
+		return -EIO;
+	}
+
+	page = folio_file_page(folio, pos >> PAGE_SHIFT);
+
+	/*
+	 * Mark the page dirty so that it won't be reclaimed once we drop the
+	 * (potentially last) reference in xmbuf_unmap_page.
+	 */
+	set_page_dirty(page);
+	unlock_page(page);
+
+	bp->b_addr = page_address(page);
+	bp->b_pages = bp->b_page_array;
+	bp->b_pages[0] = page;
+	bp->b_page_count = 1;
+	return 0;
+}
+
+/* Unmap a shmem page that was mapped into the buffer cache. */
+void
+xmbuf_unmap_page(
+	struct xfs_buf		*bp)
+{
+	struct page		*page = bp->b_pages[0];
+
+	ASSERT(xfs_buftarg_is_mem(bp->b_target));
+
+	put_page(page);
+
+	bp->b_addr = NULL;
+	bp->b_pages[0] = NULL;
+	bp->b_pages = NULL;
+	bp->b_page_count = 0;
+}
diff --git a/fs/xfs/xfs_buf_mem.h b/fs/xfs/xfs_buf_mem.h
new file mode 100644
index 000000000000..945f4b610998
--- /dev/null
+++ b/fs/xfs/xfs_buf_mem.h
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2023-2024 Oracle.  All Rights Reserved.
+ * Author: Darrick J. Wong <djwong@kernel.org>
+ */
+#ifndef __XFS_BUF_MEM_H__
+#define __XFS_BUF_MEM_H__
+
+#define XMBUF_BLOCKSIZE			(PAGE_SIZE)
+#define XMBUF_BLOCKSHIFT		(PAGE_SHIFT)
+
+#ifdef CONFIG_XFS_MEMORY_BUFS
+static inline bool xfs_buftarg_is_mem(const struct xfs_buftarg *btp)
+{
+	return btp->bt_bdev == NULL;
+}
+
+int xmbuf_alloc(struct xfs_mount *mp, const char *descr,
+		struct xfs_buftarg **btpp);
+void xmbuf_free(struct xfs_buftarg *btp);
+
+int xmbuf_map_page(struct xfs_buf *bp);
+void xmbuf_unmap_page(struct xfs_buf *bp);
+#else
+# define xfs_buftarg_is_mem(...)	(false)
+# define xmbuf_map_page(...)		(-ENOMEM)
+# define xmbuf_unmap_page(...)		((void)0)
+#endif /* CONFIG_XFS_MEMORY_BUFS */
+
+#endif /* __XFS_BUF_MEM_H__ */
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 8a5dc1538aa8..ae5be6b589f0 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -36,6 +36,7 @@
 #include "xfs_error.h"
 #include <linux/iomap.h>
 #include "xfs_iomap.h"
+#include "xfs_buf_mem.h"
 
 /*
  * We include this last to have the helpers above available for the trace
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index e876a47f1427..14cb8752e3d3 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -4514,6 +4514,55 @@ DEFINE_PERAG_INTENTS_EVENT(xfs_perag_wait_intents);
 
 #endif /* CONFIG_XFS_DRAIN_INTENTS */
 
+#ifdef CONFIG_XFS_MEMORY_BUFS
+TRACE_EVENT(xmbuf_create,
+	TP_PROTO(struct xfs_buftarg *btp),
+	TP_ARGS(btp),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(unsigned long, ino)
+		__array(char, pathname, 256)
+	),
+	TP_fast_assign(
+		char		pathname[257];
+		char		*path;
+		struct file	*file = btp->bt_file;
+
+		__entry->ino = file_inode(file)->i_ino;
+		memset(pathname, 0, sizeof(pathname));
+		path = file_path(file, pathname, sizeof(pathname) - 1);
+		if (IS_ERR(path))
+			path = "(unknown)";
+		strncpy(__entry->pathname, path, sizeof(__entry->pathname));
+	),
+	TP_printk("xmino 0x%lx path '%s'",
+		  __entry->ino,
+		  __entry->pathname)
+);
+
+TRACE_EVENT(xmbuf_free,
+	TP_PROTO(struct xfs_buftarg *btp),
+	TP_ARGS(btp),
+	TP_STRUCT__entry(
+		__field(unsigned long, ino)
+		__field(unsigned long long, bytes)
+		__field(loff_t, size)
+	),
+	TP_fast_assign(
+		struct file	*file = btp->bt_file;
+		struct inode	*inode = file_inode(file);
+
+		__entry->size = i_size_read(inode);
+		__entry->bytes = (inode->i_blocks << SECTOR_SHIFT) + inode->i_bytes;
+		__entry->ino = inode->i_ino;
+	),
+	TP_printk("xmino 0x%lx mem_bytes 0x%llx isize 0x%llx",
+		  __entry->ino,
+		  __entry->bytes,
+		  __entry->size)
+);
+#endif /* CONFIG_XFS_MEMORY_BUFS */
+
 #endif /* _TRACE_XFS_H */
 
 #undef TRACE_INCLUDE_PATH
-- 
cgit v1.2.3


From 1b9e2d90141c5e25faefbb7891f0ed8606aa02cf Mon Sep 17 00:00:00 2001
From: Christian Brauner <brauner@kernel.org>
Date: Tue, 23 Jan 2024 14:26:24 +0100
Subject: xfs: port block device access to files

Link: https://lore.kernel.org/r/20240123-vfs-bdev-file-v2-7-adbd023e19cc@kernel.org
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Christian Brauner <brauner@kernel.org>
---
 fs/xfs/xfs_buf.c   | 10 +++++-----
 fs/xfs/xfs_buf.h   |  4 ++--
 fs/xfs/xfs_super.c | 44 ++++++++++++++++++++++----------------------
 3 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'fs/xfs/xfs_buf.c')

diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 8e5bd50d29fe..01b41fabbe3c 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1951,7 +1951,7 @@ xfs_free_buftarg(
 	fs_put_dax(btp->bt_daxdev, btp->bt_mount);
 	/* the main block device is closed by kill_block_super */
 	if (btp->bt_bdev != btp->bt_mount->m_super->s_bdev)
-		bdev_release(btp->bt_bdev_handle);
+		fput(btp->bt_bdev_file);
 
 	kmem_free(btp);
 }
@@ -1994,7 +1994,7 @@ xfs_setsize_buftarg_early(
 struct xfs_buftarg *
 xfs_alloc_buftarg(
 	struct xfs_mount	*mp,
-	struct bdev_handle	*bdev_handle)
+	struct file		*bdev_file)
 {
 	xfs_buftarg_t		*btp;
 	const struct dax_holder_operations *ops = NULL;
@@ -2005,9 +2005,9 @@ xfs_alloc_buftarg(
 	btp = kmem_zalloc(sizeof(*btp), KM_NOFS);
 
 	btp->bt_mount = mp;
-	btp->bt_bdev_handle = bdev_handle;
-	btp->bt_dev = bdev_handle->bdev->bd_dev;
-	btp->bt_bdev = bdev_handle->bdev;
+	btp->bt_bdev_file = bdev_file;
+	btp->bt_bdev = file_bdev(bdev_file);
+	btp->bt_dev = btp->bt_bdev->bd_dev;
 	btp->bt_daxdev = fs_dax_get_by_bdev(btp->bt_bdev, &btp->bt_dax_part_off,
 					    mp, ops);
 
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index b470de08a46c..304e858d04fb 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -98,7 +98,7 @@ typedef unsigned int xfs_buf_flags_t;
  */
 typedef struct xfs_buftarg {
 	dev_t			bt_dev;
-	struct bdev_handle	*bt_bdev_handle;
+	struct file		*bt_bdev_file;
 	struct block_device	*bt_bdev;
 	struct dax_device	*bt_daxdev;
 	u64			bt_dax_part_off;
@@ -366,7 +366,7 @@ xfs_buf_update_cksum(struct xfs_buf *bp, unsigned long cksum_offset)
  *	Handling of buftargs.
  */
 struct xfs_buftarg *xfs_alloc_buftarg(struct xfs_mount *mp,
-		struct bdev_handle *bdev_handle);
+		struct file *bdev_file);
 extern void xfs_free_buftarg(struct xfs_buftarg *);
 extern void xfs_buftarg_wait(struct xfs_buftarg *);
 extern void xfs_buftarg_drain(struct xfs_buftarg *);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e5ac0e59ede9..3814d7371169 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -362,16 +362,16 @@ STATIC int
 xfs_blkdev_get(
 	xfs_mount_t		*mp,
 	const char		*name,
-	struct bdev_handle	**handlep)
+	struct file		**bdev_filep)
 {
 	int			error = 0;
 
-	*handlep = bdev_open_by_path(name,
+	*bdev_filep = bdev_file_open_by_path(name,
 		BLK_OPEN_READ | BLK_OPEN_WRITE | BLK_OPEN_RESTRICT_WRITES,
 		mp->m_super, &fs_holder_ops);
-	if (IS_ERR(*handlep)) {
-		error = PTR_ERR(*handlep);
-		*handlep = NULL;
+	if (IS_ERR(*bdev_filep)) {
+		error = PTR_ERR(*bdev_filep);
+		*bdev_filep = NULL;
 		xfs_warn(mp, "Invalid device [%s], error=%d", name, error);
 	}
 
@@ -436,26 +436,26 @@ xfs_open_devices(
 {
 	struct super_block	*sb = mp->m_super;
 	struct block_device	*ddev = sb->s_bdev;
-	struct bdev_handle	*logdev_handle = NULL, *rtdev_handle = NULL;
+	struct file		*logdev_file = NULL, *rtdev_file = NULL;
 	int			error;
 
 	/*
 	 * Open real time and log devices - order is important.
 	 */
 	if (mp->m_logname) {
-		error = xfs_blkdev_get(mp, mp->m_logname, &logdev_handle);
+		error = xfs_blkdev_get(mp, mp->m_logname, &logdev_file);
 		if (error)
 			return error;
 	}
 
 	if (mp->m_rtname) {
-		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_handle);
+		error = xfs_blkdev_get(mp, mp->m_rtname, &rtdev_file);
 		if (error)
 			goto out_close_logdev;
 
-		if (rtdev_handle->bdev == ddev ||
-		    (logdev_handle &&
-		     rtdev_handle->bdev == logdev_handle->bdev)) {
+		if (file_bdev(rtdev_file) == ddev ||
+		    (logdev_file &&
+		     file_bdev(rtdev_file) == file_bdev(logdev_file))) {
 			xfs_warn(mp,
 	"Cannot mount filesystem with identical rtdev and ddev/logdev.");
 			error = -EINVAL;
@@ -467,25 +467,25 @@ xfs_open_devices(
 	 * Setup xfs_mount buffer target pointers
 	 */
 	error = -ENOMEM;
-	mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb_bdev_handle(sb));
+	mp->m_ddev_targp = xfs_alloc_buftarg(mp, sb->s_bdev_file);
 	if (!mp->m_ddev_targp)
 		goto out_close_rtdev;
 
-	if (rtdev_handle) {
-		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_handle);
+	if (rtdev_file) {
+		mp->m_rtdev_targp = xfs_alloc_buftarg(mp, rtdev_file);
 		if (!mp->m_rtdev_targp)
 			goto out_free_ddev_targ;
 	}
 
-	if (logdev_handle && logdev_handle->bdev != ddev) {
-		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_handle);
+	if (logdev_file && file_bdev(logdev_file) != ddev) {
+		mp->m_logdev_targp = xfs_alloc_buftarg(mp, logdev_file);
 		if (!mp->m_logdev_targp)
 			goto out_free_rtdev_targ;
 	} else {
 		mp->m_logdev_targp = mp->m_ddev_targp;
 		/* Handle won't be used, drop it */
-		if (logdev_handle)
-			bdev_release(logdev_handle);
+		if (logdev_file)
+			fput(logdev_file);
 	}
 
 	return 0;
@@ -496,11 +496,11 @@ xfs_open_devices(
  out_free_ddev_targ:
 	xfs_free_buftarg(mp->m_ddev_targp);
  out_close_rtdev:
-	 if (rtdev_handle)
-		bdev_release(rtdev_handle);
+	 if (rtdev_file)
+		fput(rtdev_file);
  out_close_logdev:
-	if (logdev_handle)
-		bdev_release(logdev_handle);
+	if (logdev_file)
+		fput(logdev_file);
 	return error;
 }
 
-- 
cgit v1.2.3