From 3ab78df2a59a485f479d26852a060acfd8c4ecd7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:15:38 +1000 Subject: xfs: rework xfs_bmap_free callers to use xfs_defer_ops Restructure everything that used xfs_bmap_free to use xfs_defer_ops instead. For now we'll just remove the old symbols and play some cpp magic to make it work; in the next patch we'll actually rename everything. Signed-off-by: Darrick J. Wong Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 1 + 1 file changed, 1 insertion(+) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 88c26b827a2d..5993f87eee3e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -24,6 +24,7 @@ #include "xfs_bit.h" #include "xfs_sb.h" #include "xfs_mount.h" +#include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" #include "xfs_alloc_btree.h" -- cgit v1.2.3 From ba9e780246a15a35f8ebe5b60f4a11bb58e85bda Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:26:33 +1000 Subject: xfs: add tracepoints and error injection for deferred extent freeing Add a couple of tracepoints for the deferred extent free operation and a site for injecting errors while finishing the operation. This makes it easier to debug deferred ops and test log redo. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 7 +++++++ fs/xfs/libxfs/xfs_bmap.c | 2 ++ fs/xfs/xfs_error.h | 4 +++- fs/xfs/xfs_trace.h | 5 ++++- 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 5993f87eee3e..22ac3f136f21 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2702,6 +2702,13 @@ xfs_free_extent( ASSERT(len != 0); + trace_xfs_bmap_free_deferred(mp, agno, 0, agbno, len); + + if (XFS_TEST_ERROR(false, mp, + XFS_ERRTAG_FREE_EXTENT, + XFS_RANDOM_FREE_EXTENT)) + return -EIO; + error = xfs_free_extent_fix_freelist(tp, agno, &agbp); if (error) return error; diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 540a6b7928af..8e14ff45c73a 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -596,6 +596,8 @@ xfs_bmap_add_free( new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); new->xefi_startblock = bno; new->xefi_blockcount = (xfs_extlen_t)len; + trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, + XFS_FSB_TO_AGBNO(mp, bno), len); xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); } diff --git a/fs/xfs/xfs_error.h b/fs/xfs/xfs_error.h index 2e4f67f68856..da6f951435c9 100644 --- a/fs/xfs/xfs_error.h +++ b/fs/xfs/xfs_error.h @@ -90,7 +90,8 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_ERRTAG_STRATCMPL_IOERR 19 #define XFS_ERRTAG_DIOWRITE_IOERR 20 #define XFS_ERRTAG_BMAPIFORMAT 21 -#define XFS_ERRTAG_MAX 22 +#define XFS_ERRTAG_FREE_EXTENT 22 +#define XFS_ERRTAG_MAX 23 /* * Random factors for above tags, 1 means always, 2 means 1/2 time, etc. @@ -117,6 +118,7 @@ extern void xfs_verifier_error(struct xfs_buf *bp); #define XFS_RANDOM_STRATCMPL_IOERR (XFS_RANDOM_DEFAULT/10) #define XFS_RANDOM_DIOWRITE_IOERR (XFS_RANDOM_DEFAULT/10) #define XFS_RANDOM_BMAPIFORMAT XFS_RANDOM_DEFAULT +#define XFS_RANDOM_FREE_EXTENT 1 #ifdef DEBUG extern int xfs_error_test_active; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index a45b030d4592..939caf5f5a45 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2417,9 +2417,12 @@ DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_cancel); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_finish); DEFINE_DEFER_PENDING_EVENT(xfs_defer_pending_abort); -DEFINE_PHYS_EXTENT_DEFERRED_EVENT(xfs_defer_phys_extent); DEFINE_MAP_EXTENT_DEFERRED_EVENT(xfs_defer_map_extent); +#define DEFINE_BMAP_FREE_DEFERRED_EVENT DEFINE_PHYS_EXTENT_DEFERRED_EVENT +DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer); +DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From b87049444ac4a6515ba0427d16a73438b646435b Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:30:32 +1000 Subject: xfs: introduce rmap btree definitions Originally-From: Dave Chinner Add new per-ag rmap btree definitions to the per-ag structures. The rmap btree will sit in the empty slots on disk after the free space btrees, and hence form a part of the array of space management btrees. This requires the definition of the btree to be contiguous with the free space btrees. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 6 ++++++ fs/xfs/libxfs/xfs_btree.c | 4 ++-- fs/xfs/libxfs/xfs_btree.h | 3 +++ fs/xfs/libxfs/xfs_format.h | 22 +++++++++++++++++----- fs/xfs/libxfs/xfs_types.h | 4 ++-- 5 files changed, 30 insertions(+), 9 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 22ac3f136f21..c02d8e8419ce 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2272,6 +2272,10 @@ xfs_agf_verify( be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNT]) > XFS_BTREE_MAXLEVELS) return false; + if (xfs_sb_version_hasrmapbt(&mp->m_sb) && + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAP]) > XFS_BTREE_MAXLEVELS) + return false; + /* * during growfs operations, the perag is not fully initialised, * so we can't use it for any useful checking. growfs ensures we can't @@ -2403,6 +2407,8 @@ xfs_alloc_read_agf( be32_to_cpu(agf->agf_levels[XFS_BTNUM_BNOi]); pag->pagf_levels[XFS_BTNUM_CNTi] = be32_to_cpu(agf->agf_levels[XFS_BTNUM_CNTi]); + pag->pagf_levels[XFS_BTNUM_RMAPi] = + be32_to_cpu(agf->agf_levels[XFS_BTNUM_RMAPi]); spin_lock_init(&pag->pagb_lock); pag->pagb_count = 0; pag->pagb_tree = RB_ROOT; diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 4fc46529753d..4066b01a1807 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -44,9 +44,9 @@ kmem_zone_t *xfs_btree_cur_zone; * Btree magic numbers. */ static const __uint32_t xfs_magics[2][XFS_BTNUM_MAX] = { - { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, + { XFS_ABTB_MAGIC, XFS_ABTC_MAGIC, 0, XFS_BMAP_MAGIC, XFS_IBT_MAGIC, XFS_FIBT_MAGIC }, - { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, + { XFS_ABTB_CRC_MAGIC, XFS_ABTC_CRC_MAGIC, XFS_RMAP_CRC_MAGIC, XFS_BMAP_CRC_MAGIC, XFS_IBT_CRC_MAGIC, XFS_FIBT_CRC_MAGIC } }; #define xfs_btree_magic(cur) \ diff --git a/fs/xfs/libxfs/xfs_btree.h b/fs/xfs/libxfs/xfs_btree.h index ac5cd6ef6859..25f1b69e7515 100644 --- a/fs/xfs/libxfs/xfs_btree.h +++ b/fs/xfs/libxfs/xfs_btree.h @@ -77,6 +77,7 @@ union xfs_btree_rec { #define XFS_BTNUM_BMAP ((xfs_btnum_t)XFS_BTNUM_BMAPi) #define XFS_BTNUM_INO ((xfs_btnum_t)XFS_BTNUM_INOi) #define XFS_BTNUM_FINO ((xfs_btnum_t)XFS_BTNUM_FINOi) +#define XFS_BTNUM_RMAP ((xfs_btnum_t)XFS_BTNUM_RMAPi) /* * For logging record fields. @@ -109,6 +110,7 @@ do { \ case XFS_BTNUM_BMAP: __XFS_BTREE_STATS_INC(__mp, bmbt, stat); break; \ case XFS_BTNUM_INO: __XFS_BTREE_STATS_INC(__mp, ibt, stat); break; \ case XFS_BTNUM_FINO: __XFS_BTREE_STATS_INC(__mp, fibt, stat); break; \ + case XFS_BTNUM_RMAP: break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) @@ -129,6 +131,7 @@ do { \ __XFS_BTREE_STATS_ADD(__mp, ibt, stat, val); break; \ case XFS_BTNUM_FINO: \ __XFS_BTREE_STATS_ADD(__mp, fibt, stat, val); break; \ + case XFS_BTNUM_RMAP: break; \ case XFS_BTNUM_MAX: ASSERT(0); /* fucking gcc */ ; break; \ } \ } while (0) diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index adb204d40f22..74f4d3da598c 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -455,6 +455,7 @@ xfs_sb_has_compat_feature( } #define XFS_SB_FEAT_RO_COMPAT_FINOBT (1 << 0) /* free inode btree */ +#define XFS_SB_FEAT_RO_COMPAT_RMAPBT (1 << 1) /* reverse map btree */ #define XFS_SB_FEAT_RO_COMPAT_ALL \ (XFS_SB_FEAT_RO_COMPAT_FINOBT) #define XFS_SB_FEAT_RO_COMPAT_UNKNOWN ~XFS_SB_FEAT_RO_COMPAT_ALL @@ -538,6 +539,12 @@ static inline bool xfs_sb_version_hasmetauuid(struct xfs_sb *sbp) (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_META_UUID); } +static inline bool xfs_sb_version_hasrmapbt(struct xfs_sb *sbp) +{ + return (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5) && + (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_RMAPBT); +} + /* * end of superblock version macros */ @@ -598,10 +605,10 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_AGI_GOOD_VERSION(v) ((v) == XFS_AGI_VERSION) /* - * Btree number 0 is bno, 1 is cnt. This value gives the size of the + * Btree number 0 is bno, 1 is cnt, 2 is rmap. This value gives the size of the * arrays below. */ -#define XFS_BTNUM_AGF ((int)XFS_BTNUM_CNTi + 1) +#define XFS_BTNUM_AGF ((int)XFS_BTNUM_RMAPi + 1) /* * The second word of agf_levels in the first a.g. overlaps the EFS @@ -618,12 +625,10 @@ typedef struct xfs_agf { __be32 agf_seqno; /* sequence # starting from 0 */ __be32 agf_length; /* size in blocks of a.g. */ /* - * Freespace information + * Freespace and rmap information */ __be32 agf_roots[XFS_BTNUM_AGF]; /* root blocks */ - __be32 agf_spare0; /* spare field */ __be32 agf_levels[XFS_BTNUM_AGF]; /* btree levels */ - __be32 agf_spare1; /* spare field */ __be32 agf_flfirst; /* first freelist block's index */ __be32 agf_fllast; /* last freelist block's index */ @@ -1307,6 +1312,13 @@ typedef __be32 xfs_inobt_ptr_t; #define XFS_IBT_BLOCK(mp) ((xfs_agblock_t)(XFS_CNT_BLOCK(mp) + 1)) #define XFS_FIBT_BLOCK(mp) ((xfs_agblock_t)(XFS_IBT_BLOCK(mp) + 1)) +/* + * Reverse mapping btree format definitions + * + * There is a btree for the reverse map per allocation group + */ +#define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */ + /* * The first data block of an AG depends on whether the filesystem was formatted * with the finobt feature. If so, account for the finobt reserved root btree diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index b79dc66b2ecd..3d503647f26b 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -108,8 +108,8 @@ typedef enum { } xfs_lookup_t; typedef enum { - XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_BMAPi, XFS_BTNUM_INOi, - XFS_BTNUM_FINOi, XFS_BTNUM_MAX + XFS_BTNUM_BNOi, XFS_BTNUM_CNTi, XFS_BTNUM_RMAPi, XFS_BTNUM_BMAPi, + XFS_BTNUM_INOi, XFS_BTNUM_FINOi, XFS_BTNUM_MAX } xfs_btnum_t; struct xfs_name { -- cgit v1.2.3 From 8018026ef29756af6144e2e2e8dffc9c2ed0d6f7 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:31:47 +1000 Subject: xfs: rmap btree add more reserved blocks Originally-From: Dave Chinner XFS reserves a small amount of space in each AG for the minimum number of free blocks needed for operation. Adding the rmap btree increases the number of reserved blocks, but it also increases the complexity of the calculation as the free inode btree is optional (like the rmbt). Rather than calculate the prealloc blocks every time we need to check it, add a function to calculate it at mount time and store it in the struct xfs_mount, and convert the XFS_PREALLOC_BLOCKS macro just to use the xfs-mount variable directly. Signed-off-by: Dave Chinner Reviewed-by: Brian Foster Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 11 +++++++++++ fs/xfs/libxfs/xfs_alloc.h | 2 ++ fs/xfs/libxfs/xfs_format.h | 9 +-------- fs/xfs/xfs_fsops.c | 6 +++--- fs/xfs/xfs_mount.c | 2 ++ fs/xfs/xfs_mount.h | 1 + 6 files changed, 20 insertions(+), 11 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index c02d8e8419ce..43c7e53a6a8e 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -50,6 +50,17 @@ STATIC int xfs_alloc_ag_vextent_size(xfs_alloc_arg_t *); STATIC int xfs_alloc_ag_vextent_small(xfs_alloc_arg_t *, xfs_btree_cur_t *, xfs_agblock_t *, xfs_extlen_t *, int *); +xfs_extlen_t +xfs_prealloc_blocks( + struct xfs_mount *mp) +{ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + return XFS_RMAP_BLOCK(mp) + 1; + if (xfs_sb_version_hasfinobt(&mp->m_sb)) + return XFS_FIBT_BLOCK(mp) + 1; + return XFS_IBT_BLOCK(mp) + 1; +} + /* * Lookup the record equal to [bno, len] in the btree given by cur. */ diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index cf268b2d0b6c..20b54aa87026 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -232,4 +232,6 @@ int xfs_alloc_fix_freelist(struct xfs_alloc_arg *args, int flags); int xfs_free_extent_fix_freelist(struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **agbp); +xfs_extlen_t xfs_prealloc_blocks(struct xfs_mount *mp); + #endif /* __XFS_ALLOC_H__ */ diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 74f4d3da598c..3dbed011c29a 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1319,18 +1319,11 @@ typedef __be32 xfs_inobt_ptr_t; */ #define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */ -/* - * The first data block of an AG depends on whether the filesystem was formatted - * with the finobt feature. If so, account for the finobt reserved root btree - * block. - */ -#define XFS_PREALLOC_BLOCKS(mp) \ +#define XFS_RMAP_BLOCK(mp) \ (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ XFS_FIBT_BLOCK(mp) + 1 : \ XFS_IBT_BLOCK(mp) + 1) - - /* * BMAP Btree format definitions * diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 4c191d2194e8..0a3b822d9f8c 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -243,7 +243,7 @@ xfs_growfs_data_private( agf->agf_flfirst = cpu_to_be32(1); agf->agf_fllast = 0; agf->agf_flcount = 0; - tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp); + tmpsize = agsize - mp->m_ag_prealloc_blocks; agf->agf_freeblks = cpu_to_be32(tmpsize); agf->agf_longest = cpu_to_be32(tmpsize); if (xfs_sb_version_hascrc(&mp->m_sb)) @@ -340,7 +340,7 @@ xfs_growfs_data_private( agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); - arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); @@ -369,7 +369,7 @@ xfs_growfs_data_private( agno, 0); arec = XFS_ALLOC_REC_ADDR(mp, XFS_BUF_TO_BLOCK(bp), 1); - arec->ar_startblock = cpu_to_be32(XFS_PREALLOC_BLOCKS(mp)); + arec->ar_startblock = cpu_to_be32(mp->m_ag_prealloc_blocks); arec->ar_blockcount = cpu_to_be32( agsize - be32_to_cpu(arec->ar_startblock)); nfree += be32_to_cpu(arec->ar_blockcount); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 5f3ba88985d5..24ae96b4078f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -231,6 +231,8 @@ xfs_initialize_perag( if (maxagi) *maxagi = index; + + mp->m_ag_prealloc_blocks = xfs_prealloc_blocks(mp); return 0; out_unwind: diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index c1b798c72126..0537b1f28f90 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -119,6 +119,7 @@ typedef struct xfs_mount { uint m_ag_maxlevels; /* XFS_AG_MAXLEVELS */ uint m_bm_maxlevels[2]; /* XFS_BM_MAXLEVELS */ uint m_in_maxlevels; /* max inobt btree levels. */ + xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ struct radix_tree_root m_perag_tree; /* per-ag accounting info */ spinlock_t m_perag_lock; /* lock for m_perag_tree */ struct mutex m_growlock; /* growfs mutex */ -- cgit v1.2.3 From 340785cca16246f82ccaf11740d885017a9e9341 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:33:42 +1000 Subject: xfs: add owner field to extent allocation and freeing For the rmap btree to work, we have to feed the extent owner information to the the allocation and freeing functions. This information is what will end up in the rmap btree that tracks allocated extents. While we technically don't need the owner information when freeing extents, passing it allows us to validate that the extent we are removing from the rmap btree actually belonged to the owner we expected it to belong to. We also define a special set of owner values for internal metadata that would otherwise have no owner. This allows us to tell the difference between metadata owned by different per-ag btrees, as well as static fs metadata (e.g. AG headers) and internal journal blocks. There are also a couple of special cases we need to take care of - during EFI recovery, we don't actually know who the original owner was, so we need to pass a wildcard to indicate that we aren't checking the owner for validity. We also need special handling in growfs, as we "free" the space in the last AG when extending it, but because it's new space it has no actual owner... While touching the xfs_bmap_add_free() function, re-order the parameters to put the struct xfs_mount first. Extend the owner field to include both the owner type and some sort of index within the owner. The index field will be used to support reverse mappings when reflink is enabled. When we're freeing extents from an EFI, we don't have the owner information available (rmap updates have their own redo items). xfs_free_extent therefore doesn't need to do an rmap update. Make sure that the log replay code signals this correctly. This is based upon a patch originally from Dave Chinner. It has been extended to add more owner information with the intent of helping recovery operations when things go wrong (e.g. offset of user data block in a file). [dchinner: de-shout the xfs_rmap_*_owner helpers] [darrick: minor style fixes suggested by Christoph Hellwig] Signed-off-by: Dave Chinner Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 26 ++++++++++------ fs/xfs/libxfs/xfs_alloc.h | 8 +++-- fs/xfs/libxfs/xfs_bmap.c | 25 +++++++++++---- fs/xfs/libxfs/xfs_bmap.h | 4 ++- fs/xfs/libxfs/xfs_bmap_btree.c | 7 ++++- fs/xfs/libxfs/xfs_format.h | 28 +++++++++++++++++ fs/xfs/libxfs/xfs_ialloc.c | 8 +++-- fs/xfs/libxfs/xfs_ialloc_btree.c | 8 ++++- fs/xfs/libxfs/xfs_rmap.h | 67 ++++++++++++++++++++++++++++++++++++++++ fs/xfs/xfs_extfree_item.c | 6 +++- fs/xfs/xfs_fsops.c | 17 +++++++--- fs/xfs/xfs_trans.h | 2 +- fs/xfs/xfs_trans_extfree.c | 8 +++-- 13 files changed, 181 insertions(+), 33 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_rmap.h (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 43c7e53a6a8e..6335b8b85349 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -36,6 +36,7 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" +#include "xfs_rmap.h" struct workqueue_struct *xfs_alloc_wq; @@ -1589,14 +1590,15 @@ error0: /* * Free the extent starting at agno/bno for length. */ -STATIC int /* error */ +STATIC int xfs_free_ag_extent( - xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_agnumber_t agno, /* allocation group number */ - xfs_agblock_t bno, /* starting block number */ - xfs_extlen_t len, /* length of extent */ - int isfl) /* set if is freelist blocks - no sb acctg */ + xfs_trans_t *tp, + xfs_buf_t *agbp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo, + int isfl) { xfs_btree_cur_t *bno_cur; /* cursor for by-block btree */ xfs_btree_cur_t *cnt_cur; /* cursor for by-size btree */ @@ -2005,13 +2007,15 @@ xfs_alloc_fix_freelist( * back on the free list? Maybe we should only do this when space is * getting low or the AGFL is more than half full? */ + xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); while (pag->pagf_flcount > need) { struct xfs_buf *bp; error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); if (error) goto out_agbp_relse; - error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, 1); + error = xfs_free_ag_extent(tp, agbp, args->agno, bno, 1, + &targs.oinfo, 1); if (error) goto out_agbp_relse; bp = xfs_btree_get_bufs(mp, tp, args->agno, bno, 0); @@ -2021,6 +2025,7 @@ xfs_alloc_fix_freelist( memset(&targs, 0, sizeof(targs)); targs.tp = tp; targs.mp = mp; + xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); targs.agbp = agbp; targs.agno = args->agno; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; @@ -2709,7 +2714,8 @@ int /* error */ xfs_free_extent( struct xfs_trans *tp, /* transaction pointer */ xfs_fsblock_t bno, /* starting block number of extent */ - xfs_extlen_t len) /* length of extent */ + xfs_extlen_t len, /* length of extent */ + struct xfs_owner_info *oinfo) /* extent owner */ { struct xfs_mount *mp = tp->t_mountp; struct xfs_buf *agbp; @@ -2737,7 +2743,7 @@ xfs_free_extent( agbno + len <= be32_to_cpu(XFS_BUF_TO_AGF(agbp)->agf_length), err); - error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, 0); + error = xfs_free_ag_extent(tp, agbp, agno, agbno, len, oinfo, 0); if (error) goto err; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 20b54aa87026..88053422fda0 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -123,6 +123,7 @@ typedef struct xfs_alloc_arg { char isfl; /* set if is freelist blocks - !acctg */ char userdata; /* mask defining userdata treatment */ xfs_fsblock_t firstblock; /* io first block allocated */ + struct xfs_owner_info oinfo; /* owner of blocks being allocated */ } xfs_alloc_arg_t; /* @@ -208,9 +209,10 @@ xfs_alloc_vextent( */ int /* error */ xfs_free_extent( - struct xfs_trans *tp, /* transaction pointer */ - xfs_fsblock_t bno, /* starting block number of extent */ - xfs_extlen_t len); /* length of extent */ + struct xfs_trans *tp, /* transaction pointer */ + xfs_fsblock_t bno, /* starting block number of extent */ + xfs_extlen_t len, /* length of extent */ + struct xfs_owner_info *oinfo);/* extent owner */ int /* error */ xfs_alloc_lookup_ge( diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 8e14ff45c73a..919069fc7b23 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -46,6 +46,7 @@ #include "xfs_symlink.h" #include "xfs_attr_leaf.h" #include "xfs_filestream.h" +#include "xfs_rmap.h" kmem_zone_t *xfs_bmap_free_item_zone; @@ -571,10 +572,11 @@ xfs_bmap_validate_ret( */ void xfs_bmap_add_free( - struct xfs_mount *mp, /* mount point structure */ - struct xfs_defer_ops *dfops, /* list of extents */ - xfs_fsblock_t bno, /* fs block number of extent */ - xfs_filblks_t len) /* length of extent */ + struct xfs_mount *mp, + struct xfs_defer_ops *dfops, + xfs_fsblock_t bno, + xfs_filblks_t len, + struct xfs_owner_info *oinfo) { struct xfs_extent_free_item *new; /* new element */ #ifdef DEBUG @@ -593,9 +595,14 @@ xfs_bmap_add_free( ASSERT(agbno + len <= mp->m_sb.sb_agblocks); #endif ASSERT(xfs_bmap_free_item_zone != NULL); + new = kmem_zone_alloc(xfs_bmap_free_item_zone, KM_SLEEP); new->xefi_startblock = bno; new->xefi_blockcount = (xfs_extlen_t)len; + if (oinfo) + new->xefi_oinfo = *oinfo; + else + xfs_rmap_skip_owner_update(&new->xefi_oinfo); trace_xfs_bmap_free_defer(mp, XFS_FSB_TO_AGNO(mp, bno), 0, XFS_FSB_TO_AGBNO(mp, bno), len); xfs_defer_add(dfops, XFS_DEFER_OPS_TYPE_FREE, &new->xefi_list); @@ -628,6 +635,7 @@ xfs_bmap_btree_to_extents( xfs_mount_t *mp; /* mount point structure */ __be64 *pp; /* ptr to block address */ struct xfs_btree_block *rblock;/* root btree block */ + struct xfs_owner_info oinfo; mp = ip->i_mount; ifp = XFS_IFORK_PTR(ip, whichfork); @@ -651,7 +659,8 @@ xfs_bmap_btree_to_extents( cblock = XFS_BUF_TO_BLOCK(cbp); if ((error = xfs_btree_check_block(cur, cblock, 0, cbp))) return error; - xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); + xfs_bmap_add_free(mp, cur->bc_private.b.dfops, cbno, 1, &oinfo); ip->i_d.di_nblocks--; xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L); xfs_trans_binval(tp, cbp); @@ -732,6 +741,7 @@ xfs_bmap_extents_to_btree( memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = mp; + xfs_rmap_ino_bmbt_owner(&args.oinfo, ip->i_ino, whichfork); args.firstblock = *firstblock; if (*firstblock == NULLFSBLOCK) { args.type = XFS_ALLOCTYPE_START_BNO; @@ -878,6 +888,7 @@ xfs_bmap_local_to_extents( memset(&args, 0, sizeof(args)); args.tp = tp; args.mp = ip->i_mount; + xfs_rmap_ino_owner(&args.oinfo, ip->i_ino, whichfork, 0); args.firstblock = *firstblock; /* * Allocate a block. We know we need only one, since the @@ -3660,6 +3671,7 @@ xfs_bmap_btalloc( args.tp = ap->tp; args.mp = mp; args.fsbno = ap->blkno; + xfs_rmap_skip_owner_update(&args.oinfo); /* Trim the allocation back to the maximum an AG can fit. */ args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); @@ -4839,6 +4851,7 @@ xfs_bmap_del_extent( nblks = 0; do_fx = 0; } + /* * Set flag value to use in switch statement. * Left-contig is 2, right-contig is 1. @@ -5026,7 +5039,7 @@ xfs_bmap_del_extent( */ if (do_fx) xfs_bmap_add_free(mp, dfops, del->br_startblock, - del->br_blockcount); + del->br_blockcount, NULL); /* * Adjust inode # blocks in the file. */ diff --git a/fs/xfs/libxfs/xfs_bmap.h b/fs/xfs/libxfs/xfs_bmap.h index dc01bb85a6e8..254034f96941 100644 --- a/fs/xfs/libxfs/xfs_bmap.h +++ b/fs/xfs/libxfs/xfs_bmap.h @@ -67,6 +67,7 @@ struct xfs_extent_free_item xfs_fsblock_t xefi_startblock;/* starting fs block number */ xfs_extlen_t xefi_blockcount;/* number of blocks in extent */ struct list_head xefi_list; + struct xfs_owner_info xefi_oinfo; /* extent owner */ }; #define XFS_BMAP_MAX_NMAP 4 @@ -165,7 +166,8 @@ void xfs_bmap_trace_exlist(struct xfs_inode *ip, xfs_extnum_t cnt, int xfs_bmap_add_attrfork(struct xfs_inode *ip, int size, int rsvd); void xfs_bmap_local_to_extents_empty(struct xfs_inode *ip, int whichfork); void xfs_bmap_add_free(struct xfs_mount *mp, struct xfs_defer_ops *dfops, - xfs_fsblock_t bno, xfs_filblks_t len); + xfs_fsblock_t bno, xfs_filblks_t len, + struct xfs_owner_info *oinfo); void xfs_bmap_compute_maxlevels(struct xfs_mount *mp, int whichfork); int xfs_bmap_first_unused(struct xfs_trans *tp, struct xfs_inode *ip, xfs_extlen_t len, xfs_fileoff_t *unused, int whichfork); diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index 0df5318cefff..9e34ca413e53 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -35,6 +35,7 @@ #include "xfs_quota.h" #include "xfs_trace.h" #include "xfs_cksum.h" +#include "xfs_rmap.h" /* * Determine the extent state. @@ -447,6 +448,8 @@ xfs_bmbt_alloc_block( args.mp = cur->bc_mp; args.fsbno = cur->bc_private.b.firstblock; args.firstblock = args.fsbno; + xfs_rmap_ino_bmbt_owner(&args.oinfo, cur->bc_private.b.ip->i_ino, + cur->bc_private.b.whichfork); if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); @@ -526,8 +529,10 @@ xfs_bmbt_free_block( struct xfs_inode *ip = cur->bc_private.b.ip; struct xfs_trans *tp = cur->bc_tp; xfs_fsblock_t fsbno = XFS_DADDR_TO_FSB(mp, XFS_BUF_ADDR(bp)); + struct xfs_owner_info oinfo; - xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1); + xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, cur->bc_private.b.whichfork); + xfs_bmap_add_free(mp, cur->bc_private.b.dfops, fsbno, 1, &oinfo); ip->i_d.di_nblocks--; xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index 3dbed011c29a..eecfedf19be5 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -1319,6 +1319,34 @@ typedef __be32 xfs_inobt_ptr_t; */ #define XFS_RMAP_CRC_MAGIC 0x524d4233 /* 'RMB3' */ +/* + * Ownership info for an extent. This is used to create reverse-mapping + * entries. + */ +#define XFS_OWNER_INFO_ATTR_FORK (1 << 0) +#define XFS_OWNER_INFO_BMBT_BLOCK (1 << 1) +struct xfs_owner_info { + uint64_t oi_owner; + xfs_fileoff_t oi_offset; + unsigned int oi_flags; +}; + +/* + * Special owner types. + * + * Seeing as we only support up to 8EB, we have the upper bit of the owner field + * to tell us we have a special owner value. We use these for static metadata + * allocated at mkfs/growfs time, as well as for freespace management metadata. + */ +#define XFS_RMAP_OWN_NULL (-1ULL) /* No owner, for growfs */ +#define XFS_RMAP_OWN_UNKNOWN (-2ULL) /* Unknown owner, for EFI recovery */ +#define XFS_RMAP_OWN_FS (-3ULL) /* static fs metadata */ +#define XFS_RMAP_OWN_LOG (-4ULL) /* static fs metadata */ +#define XFS_RMAP_OWN_AG (-5ULL) /* AG freespace btree blocks */ +#define XFS_RMAP_OWN_INOBT (-6ULL) /* Inode btree blocks */ +#define XFS_RMAP_OWN_INODES (-7ULL) /* Inode chunk */ +#define XFS_RMAP_OWN_MIN (-8ULL) /* guard */ + #define XFS_RMAP_BLOCK(mp) \ (xfs_sb_version_hasfinobt(&((mp)->m_sb)) ? \ XFS_FIBT_BLOCK(mp) + 1 : \ diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index f0e1f713b0be..51b4e0de1fdc 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -40,6 +40,7 @@ #include "xfs_icache.h" #include "xfs_trace.h" #include "xfs_log.h" +#include "xfs_rmap.h" /* @@ -615,6 +616,7 @@ xfs_ialloc_ag_alloc( args.tp = tp; args.mp = tp->t_mountp; args.fsbno = NULLFSBLOCK; + xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INODES); #ifdef DEBUG /* randomly do sparse inode allocations */ @@ -1825,12 +1827,14 @@ xfs_difree_inode_chunk( int nextbit; xfs_agblock_t agbno; int contigblk; + struct xfs_owner_info oinfo; DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS); + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); if (!xfs_inobt_issparse(rec->ir_holemask)) { /* not sparse, calculate extent info directly */ xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, sagbno), - mp->m_ialloc_blks); + mp->m_ialloc_blks, &oinfo); return; } @@ -1874,7 +1878,7 @@ xfs_difree_inode_chunk( ASSERT(agbno % mp->m_sb.sb_spino_align == 0); ASSERT(contigblk % mp->m_sb.sb_spino_align == 0); xfs_bmap_add_free(mp, dfops, XFS_AGB_TO_FSB(mp, agno, agbno), - contigblk); + contigblk, &oinfo); /* reset range to current bit and carry on... */ startidx = endidx = nextbit; diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index a48f4482004c..c83691edf6b2 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -32,6 +32,7 @@ #include "xfs_trace.h" #include "xfs_cksum.h" #include "xfs_trans.h" +#include "xfs_rmap.h" STATIC int @@ -96,6 +97,7 @@ xfs_inobt_alloc_block( memset(&args, 0, sizeof(args)); args.tp = cur->bc_tp; args.mp = cur->bc_mp; + xfs_rmap_ag_owner(&args.oinfo, XFS_RMAP_OWN_INOBT); args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.a.agno, sbno); args.minlen = 1; args.maxlen = 1; @@ -125,8 +127,12 @@ xfs_inobt_free_block( struct xfs_btree_cur *cur, struct xfs_buf *bp) { + struct xfs_owner_info oinfo; + + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INOBT); return xfs_free_extent(cur->bc_tp, - XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1); + XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1, + &oinfo); } STATIC int diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h new file mode 100644 index 000000000000..b30e7ad6c7cc --- /dev/null +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -0,0 +1,67 @@ +/* + * Copyright (C) 2016 Oracle. All Rights Reserved. + * + * Author: Darrick J. Wong + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#ifndef __XFS_RMAP_H__ +#define __XFS_RMAP_H__ + +static inline void +xfs_rmap_ag_owner( + struct xfs_owner_info *oi, + uint64_t owner) +{ + oi->oi_owner = owner; + oi->oi_offset = 0; + oi->oi_flags = 0; +} + +static inline void +xfs_rmap_ino_bmbt_owner( + struct xfs_owner_info *oi, + xfs_ino_t ino, + int whichfork) +{ + oi->oi_owner = ino; + oi->oi_offset = 0; + oi->oi_flags = XFS_OWNER_INFO_BMBT_BLOCK; + if (whichfork == XFS_ATTR_FORK) + oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK; +} + +static inline void +xfs_rmap_ino_owner( + struct xfs_owner_info *oi, + xfs_ino_t ino, + int whichfork, + xfs_fileoff_t offset) +{ + oi->oi_owner = ino; + oi->oi_offset = offset; + oi->oi_flags = 0; + if (whichfork == XFS_ATTR_FORK) + oi->oi_flags |= XFS_OWNER_INFO_ATTR_FORK; +} + +static inline void +xfs_rmap_skip_owner_update( + struct xfs_owner_info *oi) +{ + oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; +} + +#endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 8c49f883934a..a8c461735ecb 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -27,6 +27,8 @@ #include "xfs_buf_item.h" #include "xfs_extfree_item.h" #include "xfs_log.h" +#include "xfs_btree.h" +#include "xfs_rmap.h" kmem_zone_t *xfs_efi_zone; @@ -503,6 +505,7 @@ xfs_efi_recover( int error = 0; xfs_extent_t *extp; xfs_fsblock_t startblock_fsb; + struct xfs_owner_info oinfo; ASSERT(!test_bit(XFS_EFI_RECOVERED, &efip->efi_flags)); @@ -534,10 +537,11 @@ xfs_efi_recover( return error; efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); + xfs_rmap_skip_owner_update(&oinfo); for (i = 0; i < efip->efi_format.efi_nextents; i++) { extp = &(efip->efi_format.efi_extents[i]); error = xfs_trans_free_extent(tp, efdp, extp->ext_start, - extp->ext_len); + extp->ext_len, &oinfo); if (error) goto abort_error; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 0a3b822d9f8c..b625b61d1244 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -41,6 +41,7 @@ #include "xfs_trace.h" #include "xfs_log.h" #include "xfs_filestream.h" +#include "xfs_rmap.h" /* * File system operations @@ -436,6 +437,8 @@ xfs_growfs_data_private( * There are new blocks in the old last a.g. */ if (new) { + struct xfs_owner_info oinfo; + /* * Change the agi length. */ @@ -463,14 +466,20 @@ xfs_growfs_data_private( be32_to_cpu(agi->agi_length)); xfs_alloc_log_agf(tp, bp, XFS_AGF_LENGTH); + /* * Free the new space. + * + * XFS_RMAP_OWN_NULL is used here to tell the rmap btree that + * this doesn't actually exist in the rmap btree. */ - error = xfs_free_extent(tp, XFS_AGB_TO_FSB(mp, agno, - be32_to_cpu(agf->agf_length) - new), new); - if (error) { + xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); + error = xfs_free_extent(tp, + XFS_AGB_TO_FSB(mp, agno, + be32_to_cpu(agf->agf_length) - new), + new, &oinfo); + if (error) goto error0; - } } /* diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index a5fe1874f02f..9bc368bb5eb4 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -217,7 +217,7 @@ struct xfs_efd_log_item *xfs_trans_get_efd(struct xfs_trans *, uint); int xfs_trans_free_extent(struct xfs_trans *, struct xfs_efd_log_item *, xfs_fsblock_t, - xfs_extlen_t); + xfs_extlen_t, struct xfs_owner_info *); int xfs_trans_commit(struct xfs_trans *); int __xfs_trans_roll(struct xfs_trans **, struct xfs_inode *, int *); int xfs_trans_roll(struct xfs_trans **, struct xfs_inode *); diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index b48490290b27..3aca3a7578c2 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -120,13 +120,14 @@ xfs_trans_free_extent( struct xfs_trans *tp, struct xfs_efd_log_item *efdp, xfs_fsblock_t start_block, - xfs_extlen_t ext_len) + xfs_extlen_t ext_len, + struct xfs_owner_info *oinfo) { uint next_extent; struct xfs_extent *extp; int error; - error = xfs_free_extent(tp, start_block, ext_len); + error = xfs_free_extent(tp, start_block, ext_len, oinfo); /* * Mark the transaction dirty, even on error. This ensures the @@ -213,7 +214,8 @@ xfs_extent_free_finish_item( free = container_of(item, struct xfs_extent_free_item, xefi_list); error = xfs_trans_free_extent(tp, done_item, free->xefi_startblock, - free->xefi_blockcount); + free->xefi_blockcount, + &free->xefi_oinfo); kmem_free(free); return error; } -- cgit v1.2.3 From 673930c34a4500c616cf9b2bbe1ae131ead2e155 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:33:43 +1000 Subject: xfs: introduce rmap extent operation stubs Originally-From: Dave Chinner Add the stubs into the extent allocation and freeing paths that the rmap btree implementation will hook into. While doing this, add the trace points that will be used to track rmap btree extent manipulations. [darrick.wong@oracle.com: Extend the stubs to take full owner info.] Signed-off-by: Dave Chinner Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/Makefile | 1 + fs/xfs/libxfs/xfs_alloc.c | 19 ++++++++-- fs/xfs/libxfs/xfs_rmap.c | 89 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_rmap.h | 11 ++++++ fs/xfs/xfs_trace.h | 77 ++++++++++++++++++++++++++++++++++++++++ 5 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 fs/xfs/libxfs/xfs_rmap.c (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile index 01857b096cae..0498291829c6 100644 --- a/fs/xfs/Makefile +++ b/fs/xfs/Makefile @@ -52,6 +52,7 @@ xfs-y += $(addprefix libxfs/, \ xfs_inode_fork.o \ xfs_inode_buf.o \ xfs_log_rlimit.o \ + xfs_rmap.o \ xfs_sb.o \ xfs_symlink_remote.o \ xfs_trans_resv.o \ diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 6335b8b85349..dba7ce4b4be8 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -27,6 +27,7 @@ #include "xfs_defer.h" #include "xfs_inode.h" #include "xfs_btree.h" +#include "xfs_rmap.h" #include "xfs_alloc_btree.h" #include "xfs_alloc.h" #include "xfs_extent_busy.h" @@ -36,7 +37,6 @@ #include "xfs_trans.h" #include "xfs_buf_item.h" #include "xfs_log.h" -#include "xfs_rmap.h" struct workqueue_struct *xfs_alloc_wq; @@ -649,6 +649,14 @@ xfs_alloc_ag_vextent( ASSERT(!args->wasfromfl || !args->isfl); ASSERT(args->agbno % args->alignment == 0); + /* if not file data, insert new block into the reverse map btree */ + if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { + error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, + args->agbno, args->len, &args->oinfo); + if (error) + return error; + } + if (!args->wasfromfl) { error = xfs_alloc_update_counters(args->tp, args->pag, args->agbp, @@ -1615,12 +1623,19 @@ xfs_free_ag_extent( xfs_extlen_t nlen; /* new length of freespace */ xfs_perag_t *pag; /* per allocation group data */ + bno_cur = cnt_cur = NULL; mp = tp->t_mountp; + + if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { + error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); + if (error) + goto error0; + } + /* * Allocate and initialize a cursor for the by-block btree. */ bno_cur = xfs_allocbt_init_cursor(mp, tp, agbp, agno, XFS_BTNUM_BNO); - cnt_cur = NULL; /* * Look for a neighboring block on the left (lower block numbers) * that is contiguous with this space. diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c new file mode 100644 index 000000000000..b522bfc436a2 --- /dev/null +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2014 Red Hat, Inc. + * All Rights Reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#include "xfs.h" +#include "xfs_fs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_bit.h" +#include "xfs_sb.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_da_format.h" +#include "xfs_da_btree.h" +#include "xfs_btree.h" +#include "xfs_trans.h" +#include "xfs_alloc.h" +#include "xfs_rmap.h" +#include "xfs_trans_space.h" +#include "xfs_trace.h" +#include "xfs_error.h" +#include "xfs_extent_busy.h" + +int +xfs_rmap_free( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + struct xfs_mount *mp = tp->t_mountp; + int error = 0; + + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return 0; + + trace_xfs_rmap_unmap(mp, agno, bno, len, false, oinfo); + if (1) + goto out_error; + trace_xfs_rmap_unmap_done(mp, agno, bno, len, false, oinfo); + return 0; + +out_error: + trace_xfs_rmap_unmap_error(mp, agno, error, _RET_IP_); + return error; +} + +int +xfs_rmap_alloc( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_agnumber_t agno, + xfs_agblock_t bno, + xfs_extlen_t len, + struct xfs_owner_info *oinfo) +{ + struct xfs_mount *mp = tp->t_mountp; + int error = 0; + + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return 0; + + trace_xfs_rmap_map(mp, agno, bno, len, false, oinfo); + if (1) + goto out_error; + trace_xfs_rmap_map_done(mp, agno, bno, len, false, oinfo); + return 0; + +out_error: + trace_xfs_rmap_map_error(mp, agno, error, _RET_IP_); + return error; +} diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h index b30e7ad6c7cc..dbf0301e9ae1 100644 --- a/fs/xfs/libxfs/xfs_rmap.h +++ b/fs/xfs/libxfs/xfs_rmap.h @@ -64,4 +64,15 @@ xfs_rmap_skip_owner_update( oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; } +/* Reverse mapping functions. */ + +struct xfs_buf; + +int xfs_rmap_alloc(struct xfs_trans *tp, struct xfs_buf *agbp, + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); +int xfs_rmap_free(struct xfs_trans *tp, struct xfs_buf *agbp, + xfs_agnumber_t agno, xfs_agblock_t bno, xfs_extlen_t len, + struct xfs_owner_info *oinfo); + #endif /* __XFS_RMAP_H__ */ diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 939caf5f5a45..79e3812499ba 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -2423,6 +2423,83 @@ DEFINE_MAP_EXTENT_DEFERRED_EVENT(xfs_defer_map_extent); DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_defer); DEFINE_BMAP_FREE_DEFERRED_EVENT(xfs_bmap_free_deferred); +/* rmap tracepoints */ +DECLARE_EVENT_CLASS(xfs_rmap_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, + xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, + struct xfs_owner_info *oinfo), + TP_ARGS(mp, agno, agbno, len, unwritten, oinfo), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(xfs_agblock_t, agbno) + __field(xfs_extlen_t, len) + __field(uint64_t, owner) + __field(uint64_t, offset) + __field(unsigned long, flags) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->agbno = agbno; + __entry->len = len; + __entry->owner = oinfo->oi_owner; + __entry->offset = oinfo->oi_offset; + __entry->flags = oinfo->oi_flags; + ), + TP_printk("dev %d:%d agno %u agbno %u len %u owner %lld offset %llu flags 0x%lx", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->agbno, + __entry->len, + __entry->owner, + __entry->offset, + __entry->flags) +); +#define DEFINE_RMAP_EVENT(name) \ +DEFINE_EVENT(xfs_rmap_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, \ + xfs_agblock_t agbno, xfs_extlen_t len, bool unwritten, \ + struct xfs_owner_info *oinfo), \ + TP_ARGS(mp, agno, agbno, len, unwritten, oinfo)) + +/* simple AG-based error/%ip tracepoint class */ +DECLARE_EVENT_CLASS(xfs_ag_error_class, + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, + unsigned long caller_ip), + TP_ARGS(mp, agno, error, caller_ip), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(xfs_agnumber_t, agno) + __field(int, error) + __field(unsigned long, caller_ip) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->agno = agno; + __entry->error = error; + __entry->caller_ip = caller_ip; + ), + TP_printk("dev %d:%d agno %u error %d caller %ps", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->agno, + __entry->error, + (char *)__entry->caller_ip) +); + +#define DEFINE_AG_ERROR_EVENT(name) \ +DEFINE_EVENT(xfs_ag_error_class, name, \ + TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int error, \ + unsigned long caller_ip), \ + TP_ARGS(mp, agno, error, caller_ip)) + +DEFINE_RMAP_EVENT(xfs_rmap_unmap); +DEFINE_RMAP_EVENT(xfs_rmap_unmap_done); +DEFINE_AG_ERROR_EVENT(xfs_rmap_unmap_error); +DEFINE_RMAP_EVENT(xfs_rmap_map); +DEFINE_RMAP_EVENT(xfs_rmap_map_done); +DEFINE_AG_ERROR_EVENT(xfs_rmap_map_error); + #endif /* _TRACE_XFS_H */ #undef TRACE_INCLUDE_PATH -- cgit v1.2.3 From 525488520ac69a3612dbceefa573b255a83005e9 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 11:38:24 +1000 Subject: xfs: rmap btree requires more reserved free space Originally-From: Dave Chinner The rmap btree is allocated from the AGFL, which means we have to ensure ENOSPC is reported to userspace before we run out of free space in each AG. The last allocation in an AG can cause a full height rmap btree split, and that means we have to reserve at least this many blocks *in each AG* to be placed on the AGFL at ENOSPC. Update the various space calculation functions to handle this. Also, because the macros are now executing conditional code and are called quite frequently, convert them to functions that initialise variables in the struct xfs_mount, use the new variables everywhere and document the calculations better. [darrick.wong@oracle.com: don't reserve blocks if !rmap] [dchinner@redhat.com: update m_ag_max_usable after growfs] Signed-off-by: Dave Chinner Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++ fs/xfs/libxfs/xfs_alloc.h | 41 ++++------------------------ fs/xfs/libxfs/xfs_bmap.c | 2 +- fs/xfs/libxfs/xfs_sb.c | 2 ++ fs/xfs/xfs_discard.c | 2 +- fs/xfs/xfs_fsops.c | 5 ++-- fs/xfs/xfs_log_recover.c | 1 + fs/xfs/xfs_mount.c | 2 +- fs/xfs/xfs_mount.h | 2 ++ fs/xfs/xfs_super.c | 2 +- 10 files changed, 86 insertions(+), 42 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index dba7ce4b4be8..73ab1ea1270d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -62,6 +62,70 @@ xfs_prealloc_blocks( return XFS_IBT_BLOCK(mp) + 1; } +/* + * In order to avoid ENOSPC-related deadlock caused by out-of-order locking of + * AGF buffer (PV 947395), we place constraints on the relationship among + * actual allocations for data blocks, freelist blocks, and potential file data + * bmap btree blocks. However, these restrictions may result in no actual space + * allocated for a delayed extent, for example, a data block in a certain AG is + * allocated but there is no additional block for the additional bmap btree + * block due to a split of the bmap btree of the file. The result of this may + * lead to an infinite loop when the file gets flushed to disk and all delayed + * extents need to be actually allocated. To get around this, we explicitly set + * aside a few blocks which will not be reserved in delayed allocation. + * + * When rmap is disabled, we need to reserve 4 fsbs _per AG_ for the freelist + * and 4 more to handle a potential split of the file's bmap btree. + * + * When rmap is enabled, we must also be able to handle two rmap btree inserts + * to record both the file data extent and a new bmbt block. The bmbt block + * might not be in the same AG as the file data extent. In the worst case + * the bmap btree splits multiple levels and all the new blocks come from + * different AGs, so set aside enough to handle rmap btree splits in all AGs. + */ +unsigned int +xfs_alloc_set_aside( + struct xfs_mount *mp) +{ + unsigned int blocks; + + blocks = 4 + (mp->m_sb.sb_agcount * XFS_ALLOC_AGFL_RESERVE); + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + blocks += mp->m_sb.sb_agcount * mp->m_rmap_maxlevels; + return blocks; +} + +/* + * When deciding how much space to allocate out of an AG, we limit the + * allocation maximum size to the size the AG. However, we cannot use all the + * blocks in the AG - some are permanently used by metadata. These + * blocks are generally: + * - the AG superblock, AGF, AGI and AGFL + * - the AGF (bno and cnt) and AGI btree root blocks, and optionally + * the AGI free inode and rmap btree root blocks. + * - blocks on the AGFL according to xfs_alloc_set_aside() limits + * - the rmapbt root block + * + * The AG headers are sector sized, so the amount of space they take up is + * dependent on filesystem geometry. The others are all single blocks. + */ +unsigned int +xfs_alloc_ag_max_usable( + struct xfs_mount *mp) +{ + unsigned int blocks; + + blocks = XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)); /* ag headers */ + blocks += XFS_ALLOC_AGFL_RESERVE; + blocks += 3; /* AGF, AGI btree root blocks */ + if (xfs_sb_version_hasfinobt(&mp->m_sb)) + blocks++; /* finobt root block */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + blocks++; /* rmap root block */ + + return mp->m_sb.sb_agblocks - blocks; +} + /* * Lookup the record equal to [bno, len] in the btree given by cur. */ @@ -1904,6 +1968,11 @@ xfs_alloc_min_freelist( /* space needed by-size freespace btree */ min_free += min_t(unsigned int, pag->pagf_levels[XFS_BTNUM_CNTi] + 1, mp->m_ag_maxlevels); + /* space needed reverse mapping used space btree */ + if (xfs_sb_version_hasrmapbt(&mp->m_sb)) + min_free += min_t(unsigned int, + pag->pagf_levels[XFS_BTNUM_RMAPi] + 1, + mp->m_rmap_maxlevels); return min_free; } diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 88053422fda0..360f9e425536 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -55,42 +55,6 @@ typedef unsigned int xfs_alloctype_t; #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ -/* - * In order to avoid ENOSPC-related deadlock caused by - * out-of-order locking of AGF buffer (PV 947395), we place - * constraints on the relationship among actual allocations for - * data blocks, freelist blocks, and potential file data bmap - * btree blocks. However, these restrictions may result in no - * actual space allocated for a delayed extent, for example, a data - * block in a certain AG is allocated but there is no additional - * block for the additional bmap btree block due to a split of the - * bmap btree of the file. The result of this may lead to an - * infinite loop in xfssyncd when the file gets flushed to disk and - * all delayed extents need to be actually allocated. To get around - * this, we explicitly set aside a few blocks which will not be - * reserved in delayed allocation. Considering the minimum number of - * needed freelist blocks is 4 fsbs _per AG_, a potential split of file's bmap - * btree requires 1 fsb, so we set the number of set-aside blocks - * to 4 + 4*agcount. - */ -#define XFS_ALLOC_SET_ASIDE(mp) (4 + ((mp)->m_sb.sb_agcount * 4)) - -/* - * When deciding how much space to allocate out of an AG, we limit the - * allocation maximum size to the size the AG. However, we cannot use all the - * blocks in the AG - some are permanently used by metadata. These - * blocks are generally: - * - the AG superblock, AGF, AGI and AGFL - * - the AGF (bno and cnt) and AGI btree root blocks - * - 4 blocks on the AGFL according to XFS_ALLOC_SET_ASIDE() limits - * - * The AG headers are sector sized, so the amount of space they take up is - * dependent on filesystem geometry. The others are all single blocks. - */ -#define XFS_ALLOC_AG_MAX_USABLE(mp) \ - ((mp)->m_sb.sb_agblocks - XFS_BB_TO_FSB(mp, XFS_FSS_TO_BB(mp, 4)) - 7) - - /* * Argument structure for xfs_alloc routines. * This is turned into a structure to avoid having 20 arguments passed @@ -133,6 +97,11 @@ typedef struct xfs_alloc_arg { #define XFS_ALLOC_INITIAL_USER_DATA (1 << 1)/* special case start of file */ #define XFS_ALLOC_USERDATA_ZERO (1 << 2)/* zero extent on allocation */ +/* freespace limit calculations */ +#define XFS_ALLOC_AGFL_RESERVE 4 +unsigned int xfs_alloc_set_aside(struct xfs_mount *mp); +unsigned int xfs_alloc_ag_max_usable(struct xfs_mount *mp); + xfs_extlen_t xfs_alloc_longest_free_extent(struct xfs_mount *mp, struct xfs_perag *pag, xfs_extlen_t need); unsigned int xfs_alloc_min_freelist(struct xfs_mount *mp, diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index 919069fc7b23..c4b89218c7eb 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -3674,7 +3674,7 @@ xfs_bmap_btalloc( xfs_rmap_skip_owner_update(&args.oinfo); /* Trim the allocation back to the maximum an AG can fit. */ - args.maxlen = MIN(ap->length, XFS_ALLOC_AG_MAX_USABLE(mp)); + args.maxlen = MIN(ap->length, mp->m_ag_max_usable); args.firstblock = *ap->firstblock; blen = 0; if (nullfb) { diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index fb33e70e5c9a..0e3d4f5ec33c 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -745,6 +745,8 @@ xfs_sb_mount_common( mp->m_ialloc_min_blks = sbp->sb_spino_align; else mp->m_ialloc_min_blks = mp->m_ialloc_blks; + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); + mp->m_ag_max_usable = xfs_alloc_ag_max_usable(mp); } /* diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c index 272c3f8b6f7d..4ff499aa7338 100644 --- a/fs/xfs/xfs_discard.c +++ b/fs/xfs/xfs_discard.c @@ -179,7 +179,7 @@ xfs_ioc_trim( * matter as trimming blocks is an advisory interface. */ if (range.start >= XFS_FSB_TO_B(mp, mp->m_sb.sb_dblocks) || - range.minlen > XFS_FSB_TO_B(mp, XFS_ALLOC_AG_MAX_USABLE(mp)) || + range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) || range.len < mp->m_sb.sb_blocksize) return -EINVAL; diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index 27e6e8cdd431..83304d54e6d4 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -584,6 +584,7 @@ xfs_growfs_data_private( } else mp->m_maxicount = 0; xfs_set_low_space_thresholds(mp); + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); /* update secondary superblocks. */ for (agno = 1; agno < nagcount; agno++) { @@ -721,7 +722,7 @@ xfs_fs_counts( cnt->allocino = percpu_counter_read_positive(&mp->m_icount); cnt->freeino = percpu_counter_read_positive(&mp->m_ifree); cnt->freedata = percpu_counter_read_positive(&mp->m_fdblocks) - - XFS_ALLOC_SET_ASIDE(mp); + mp->m_alloc_set_aside; spin_lock(&mp->m_sb_lock); cnt->freertx = mp->m_sb.sb_frextents; @@ -809,7 +810,7 @@ xfs_reserve_blocks( error = -ENOSPC; do { free = percpu_counter_sum(&mp->m_fdblocks) - - XFS_ALLOC_SET_ASIDE(mp); + mp->m_alloc_set_aside; if (!free) break; diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index af608aade6df..f64d4d06cd95 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -5008,6 +5008,7 @@ xlog_do_recover( xfs_warn(mp, "Failed post-recovery per-ag init: %d", error); return error; } + mp->m_alloc_set_aside = xfs_alloc_set_aside(mp); xlog_recover_check_summary(log); diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 0041866d51ca..faeead671f9f 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1221,7 +1221,7 @@ xfs_mod_fdblocks( batch = XFS_FDBLOCKS_BATCH; __percpu_counter_add(&mp->m_fdblocks, delta, batch); - if (__percpu_counter_compare(&mp->m_fdblocks, XFS_ALLOC_SET_ASIDE(mp), + if (__percpu_counter_compare(&mp->m_fdblocks, mp->m_alloc_set_aside, XFS_FDBLOCKS_BATCH) >= 0) { /* we had space! */ return 0; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 0ed0f29ddaf5..b36676cde103 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -123,6 +123,8 @@ typedef struct xfs_mount { uint m_in_maxlevels; /* max inobt btree levels. */ uint m_rmap_maxlevels; /* max rmap btree levels */ xfs_extlen_t m_ag_prealloc_blocks; /* reserved ag blocks */ + uint m_alloc_set_aside; /* space we can't use */ + uint m_ag_max_usable; /* max space per AG */ struct radix_tree_root m_perag_tree; /* per-ag accounting info */ spinlock_t m_perag_lock; /* lock for m_perag_tree */ struct mutex m_growlock; /* growfs mutex */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 95a11beb48c6..449cadf149f9 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -1075,7 +1075,7 @@ xfs_fs_statfs( statp->f_blocks = sbp->sb_dblocks - lsize; spin_unlock(&mp->m_sb_lock); - statp->f_bfree = fdblocks - XFS_ALLOC_SET_ASIDE(mp); + statp->f_bfree = fdblocks - mp->m_alloc_set_aside; statp->f_bavail = statp->f_bfree; fakeinos = statp->f_bfree << sbp->sb_inopblog; -- cgit v1.2.3 From 04f130605ff6fb01a93a0885607921df9c463eed Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 12:19:53 +1000 Subject: xfs: don't update rmapbt when fixing agfl Allow a caller of xfs_alloc_fix_freelist to disable rmapbt updates when fixing the AG freelist. xfs_repair needs this during phase 5 to be able to adjust the freelist while it's reconstructing the rmap btree; the missing entries will be added back at the very end of phase 5 once the AGFL contents settle down. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 18 ++++++++++++++---- fs/xfs/libxfs/xfs_alloc.h | 3 +++ 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 73ab1ea1270d..a0dc2999778d 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2090,9 +2090,21 @@ xfs_alloc_fix_freelist( * anything other than extra overhead when we need to put more blocks * back on the free list? Maybe we should only do this when space is * getting low or the AGFL is more than half full? + * + * The NOSHRINK flag prevents the AGFL from being shrunk if it's too + * big; the NORMAP flag prevents AGFL expand/shrink operations from + * updating the rmapbt. Both flags are used in xfs_repair while we're + * rebuilding the rmapbt, and neither are used by the kernel. They're + * both required to ensure that rmaps are correctly recorded for the + * regenerated AGFL, bnobt, and cntbt. See repair/phase5.c and + * repair/rmap.c in xfsprogs for details. */ - xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); - while (pag->pagf_flcount > need) { + memset(&targs, 0, sizeof(targs)); + if (flags & XFS_ALLOC_FLAG_NORMAP) + xfs_rmap_skip_owner_update(&targs.oinfo); + else + xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); + while (!(flags & XFS_ALLOC_FLAG_NOSHRINK) && pag->pagf_flcount > need) { struct xfs_buf *bp; error = xfs_alloc_get_freelist(tp, agbp, &bno, 0); @@ -2106,10 +2118,8 @@ xfs_alloc_fix_freelist( xfs_trans_binval(tp, bp); } - memset(&targs, 0, sizeof(targs)); targs.tp = tp; targs.mp = mp; - xfs_rmap_ag_owner(&targs.oinfo, XFS_RMAP_OWN_AG); targs.agbp = agbp; targs.agno = args->agno; targs.alignment = targs.minlen = targs.prod = targs.isfl = 1; diff --git a/fs/xfs/libxfs/xfs_alloc.h b/fs/xfs/libxfs/xfs_alloc.h index 360f9e425536..6fe2d6b7cfe9 100644 --- a/fs/xfs/libxfs/xfs_alloc.h +++ b/fs/xfs/libxfs/xfs_alloc.h @@ -54,6 +54,9 @@ typedef unsigned int xfs_alloctype_t; */ #define XFS_ALLOC_FLAG_TRYLOCK 0x00000001 /* use trylock for buffer locking */ #define XFS_ALLOC_FLAG_FREEING 0x00000002 /* indicate caller is freeing extents*/ +#define XFS_ALLOC_FLAG_NORMAP 0x00000004 /* don't modify the rmapbt */ +#define XFS_ALLOC_FLAG_NOSHRINK 0x00000008 /* don't shrink the freelist */ + /* * Argument structure for xfs_alloc routines. -- cgit v1.2.3 From 3481b68285238054be519ad0c8cad5cc2425e26c Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Wed, 3 Aug 2016 12:31:07 +1000 Subject: xfs: move (and rename) the deferred bmap-free tracepoints Rename the deferred bmap-free to extent_free and make them only trigger when we're really running deferred ops. Signed-off-by: Darrick J. Wong Reviewed-by: Dave Chinner Signed-off-by: Dave Chinner --- fs/xfs/libxfs/xfs_alloc.c | 2 -- fs/xfs/xfs_trans_extfree.c | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'fs/xfs/libxfs/xfs_alloc.c') diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index a0dc2999778d..776ae2f325d1 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -2819,8 +2819,6 @@ xfs_free_extent( ASSERT(len != 0); - trace_xfs_bmap_free_deferred(mp, agno, 0, agbno, len); - if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_FREE_EXTENT, XFS_RANDOM_FREE_EXTENT)) diff --git a/fs/xfs/xfs_trans_extfree.c b/fs/xfs/xfs_trans_extfree.c index 7d1e84c1b848..459ddec137a4 100644 --- a/fs/xfs/xfs_trans_extfree.c +++ b/fs/xfs/xfs_trans_extfree.c @@ -21,6 +21,7 @@ #include "xfs_format.h" #include "xfs_log_format.h" #include "xfs_trans_resv.h" +#include "xfs_bit.h" #include "xfs_mount.h" #include "xfs_defer.h" #include "xfs_trans.h" @@ -28,6 +29,7 @@ #include "xfs_extfree_item.h" #include "xfs_alloc.h" #include "xfs_bmap.h" +#include "xfs_trace.h" /* * This routine is called to allocate an "extent free done" @@ -68,10 +70,15 @@ xfs_trans_free_extent( xfs_extlen_t ext_len, struct xfs_owner_info *oinfo) { + struct xfs_mount *mp = tp->t_mountp; uint next_extent; + xfs_agnumber_t agno = XFS_FSB_TO_AGNO(mp, start_block); + xfs_agblock_t agbno = XFS_FSB_TO_AGBNO(mp, start_block); struct xfs_extent *extp; int error; + trace_xfs_bmap_free_deferred(tp->t_mountp, agno, 0, agbno, ext_len); + error = xfs_free_extent(tp, start_block, ext_len, oinfo); /* -- cgit v1.2.3