diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2019-08-30 15:45:11 -0700 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2019-10-19 10:39:13 -0700 |
commit | 1034eb85aa650bfc1bf7034837e8dbc3f1f671f7 (patch) | |
tree | 39a08aebd0101b50176c0fa19794dbe2ebf0c51a | |
parent | f306f3341f8afc730907c1ec3dc00ae692634b3f (diff) |
xfs: parallelize inode inactivation
Split the inode inactivation work into per-AG work items so that we can
take advantage of parallelization.
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/scrub/common.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 105 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 4 |
6 files changed, 104 insertions, 16 deletions
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index abe88fa756aa..299567271e01 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -910,6 +910,7 @@ xchk_stop_reaping( { sc->flags |= XCHK_REAPING_DISABLED; xfs_stop_block_reaping(sc->mp); + xfs_inactive_cancel_work(sc->mp); } /* Restart background reaping of resources. */ @@ -917,6 +918,7 @@ void xchk_start_reaping( struct xfs_scrub *sc) { + xfs_inactive_schedule_work(sc->mp, 0); xfs_start_block_reaping(sc->mp); sc->flags &= ~XCHK_REAPING_DISABLED; } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 296cf2f2c417..4cce17eb3d6c 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1981,12 +1981,12 @@ xfs_start_block_reaping( /* Queue a new inode inactivation pass if there are reclaimable inodes. */ static void xfs_inactive_work_queue( - struct xfs_mount *mp) + struct xfs_perag *pag) { rcu_read_lock(); - if (radix_tree_tagged(&mp->m_perag_tree, XFS_ICI_RECLAIM_TAG)) - queue_delayed_work(mp->m_inactive_workqueue, - &mp->m_inactive_work, + if (pag->pag_ici_inactive) + queue_delayed_work(pag->pag_mount->m_inactive_workqueue, + &pag->pag_inactive_work, msecs_to_jiffies(xfs_syncd_centisecs / 6 * 10)); rcu_read_unlock(); } @@ -2012,7 +2012,7 @@ xfs_perag_set_inactive_tag( * take a while, so we allow the deferral of an already-scheduled * inactivation on the grounds that we prefer batching. */ - xfs_inactive_work_queue(mp); + xfs_inactive_work_queue(pag); trace_xfs_perag_set_reclaim(mp, pag->pag_agno, -1, _RET_IP_); } @@ -2121,6 +2121,28 @@ xfs_inactive_inode( } /* + * Inactivate the inodes in an AG. Even if the filesystem is corrupted, we + * still need to clear the INACTIVE iflag so that we can move on to reclaiming + * the inode. + */ +static int +xfs_inactive_inodes_pag( + struct xfs_perag *pag) +{ + DEFINE_INACTIVE_CTX(inctx, NULL); + int error; + + error = xfs_ici_walk_ag(pag, 0, xfs_inactive_grab, xfs_inactive_inode, + NULL, &inctx, XFS_ICI_RECLAIM_TAG); + + /* If we inactivated any inodes at all, we need to kick reclaim. */ + if (inctx.kick_reclaim) + xfs_reclaim_work_queue(pag->pag_mount); + + return error; +} + +/* * Walk the AGs and reclaim the inodes in them. Even if the filesystem is * corrupted, we still need to clear the INACTIVE iflag so that we can move * on to reclaiming the inode. @@ -2148,8 +2170,9 @@ void xfs_inactive_worker( struct work_struct *work) { - struct xfs_mount *mp = container_of(to_delayed_work(work), - struct xfs_mount, m_inactive_work); + struct xfs_perag *pag = container_of(to_delayed_work(work), + struct xfs_perag, pag_inactive_work); + struct xfs_mount *mp = pag->pag_mount; int error; /* @@ -2164,12 +2187,33 @@ xfs_inactive_worker( if (!sb_start_write_trylock(mp->m_super)) return; - error = xfs_inactive_inodes(mp, NULL); + error = xfs_inactive_inodes_pag(pag); if (error && error != -EAGAIN) xfs_err(mp, "inode inactivation failed, error %d", error); sb_end_write(mp->m_super); - xfs_inactive_work_queue(mp); + xfs_inactive_work_queue(pag); +} + +/* Wait for all background inactivation work to finish. */ +static void +xfs_inactive_flush( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno = 0; + + while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) { + bool flush; + + agno = pag->pag_agno + 1; + spin_lock(&pag->pag_ici_lock); + flush = pag->pag_ici_inactive > 0; + spin_unlock(&pag->pag_ici_lock); + if (flush) + flush_delayed_work(&pag->pag_inactive_work); + xfs_perag_put(pag); + } } /* Flush all inode inactivation work that might be queued. */ @@ -2177,8 +2221,8 @@ void xfs_inactive_force( struct xfs_mount *mp) { - queue_delayed_work(mp->m_inactive_workqueue, &mp->m_inactive_work, 0); - flush_delayed_work(&mp->m_inactive_work); + xfs_inactive_schedule_work(mp, 0); + xfs_inactive_flush(mp); } /* @@ -2190,7 +2234,42 @@ void xfs_inactive_shutdown( struct xfs_mount *mp) { - cancel_delayed_work_sync(&mp->m_inactive_work); - flush_workqueue(mp->m_inactive_workqueue); + xfs_inactive_cancel_work(mp); xfs_inactive_inodes(mp, NULL); } + +/* Cancel all queued inactivation work. */ +void +xfs_inactive_cancel_work( + struct xfs_mount *mp) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno = 0; + + while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) { + agno = pag->pag_agno + 1; + cancel_delayed_work_sync(&pag->pag_inactive_work); + xfs_perag_put(pag); + } + flush_workqueue(mp->m_inactive_workqueue); +} + +/* Reschedule background inactivation work. */ +void +xfs_inactive_schedule_work( + struct xfs_mount *mp, + unsigned long delay) +{ + struct xfs_perag *pag; + xfs_agnumber_t agno = 0; + + while ((pag = xfs_perag_get_tag(mp, agno, XFS_ICI_RECLAIM_TAG))) { + agno = pag->pag_agno + 1; + spin_lock(&pag->pag_ici_lock); + if (pag->pag_ici_inactive) + queue_delayed_work(mp->m_inactive_workqueue, + &pag->pag_inactive_work, delay); + spin_unlock(&pag->pag_ici_lock); + xfs_perag_put(pag); + } +} diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h index eaddfba46f3e..f0a18238bc86 100644 --- a/fs/xfs/xfs_icache.h +++ b/fs/xfs/xfs_icache.h @@ -86,5 +86,7 @@ void xfs_inactive_worker(struct work_struct *work); int xfs_inactive_inodes(struct xfs_mount *mp, struct xfs_eofblocks *eofb); void xfs_inactive_force(struct xfs_mount *mp); void xfs_inactive_shutdown(struct xfs_mount *mp); +void xfs_inactive_cancel_work(struct xfs_mount *mp); +void xfs_inactive_schedule_work(struct xfs_mount *mp, unsigned long delay); #endif diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 3b8669f6bd24..060e558f44c3 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -126,6 +126,7 @@ __xfs_free_perag( { struct xfs_perag *pag = container_of(head, struct xfs_perag, rcu_head); + ASSERT(!delayed_work_pending(&pag->pag_inactive_work)); ASSERT(atomic_read(&pag->pag_ref) == 0); kmem_free(pag); } @@ -146,6 +147,7 @@ xfs_free_perag( spin_unlock(&mp->m_perag_lock); ASSERT(pag); ASSERT(atomic_read(&pag->pag_ref) == 0); + cancel_delayed_work_sync(&pag->pag_inactive_work); xfs_iunlink_destroy(pag); xfs_buf_hash_destroy(pag); mutex_destroy(&pag->pag_ici_reclaim_lock); @@ -201,6 +203,7 @@ xfs_initialize_perag( pag->pag_mount = mp; spin_lock_init(&pag->pag_ici_lock); mutex_init(&pag->pag_ici_reclaim_lock); + INIT_DELAYED_WORK(&pag->pag_inactive_work, xfs_inactive_worker); INIT_RADIX_TREE(&pag->pag_ici_root, GFP_ATOMIC); if (xfs_buf_hash_init(pag)) goto out_free_pag; diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index 2d675dd37e5b..e5170fbc98e9 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -177,7 +177,6 @@ typedef struct xfs_mount { trimming */ struct delayed_work m_cowblocks_work; /* background cow blocks trimming */ - struct delayed_work m_inactive_work; /* background inode inactive */ bool m_update_sb; /* sb needs update in mount */ int64_t m_low_space[XFS_LOWSP_MAX]; /* low free space thresholds */ @@ -416,6 +415,9 @@ typedef struct xfs_perag { /* Blocks reserved for the reverse mapping btree. */ struct xfs_ag_resv pag_rmapbt_resv; + /* background inode inactivation */ + struct delayed_work pag_inactive_work; + /* reference count */ uint8_t pagf_refcount_level; diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 6ecf4686e8a8..90de69cfc00d 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -839,7 +839,8 @@ xfs_init_mount_workqueues( goto out_destroy_eofb; mp->m_inactive_workqueue = alloc_workqueue("xfs-inactive/%s", - WQ_MEM_RECLAIM|WQ_FREEZABLE, 0, mp->m_fsname); + WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_FREEZABLE, 0, + mp->m_fsname); if (!mp->m_inactive_workqueue) goto out_destroy_sync; @@ -1641,7 +1642,6 @@ xfs_mount_alloc( INIT_DELAYED_WORK(&mp->m_reclaim_work, xfs_reclaim_worker); INIT_DELAYED_WORK(&mp->m_eofblocks_work, xfs_eofblocks_worker); INIT_DELAYED_WORK(&mp->m_cowblocks_work, xfs_cowblocks_worker); - INIT_DELAYED_WORK(&mp->m_inactive_work, xfs_inactive_worker); mp->m_kobj.kobject.kset = xfs_kset; /* * We don't create the finobt per-ag space reservation until after log |