diff options
author | Darrick J. Wong <darrick.wong@oracle.com> | 2020-02-19 17:01:33 -0800 |
---|---|---|
committer | Darrick J. Wong <darrick.wong@oracle.com> | 2020-06-01 21:16:28 -0700 |
commit | f814ba2f0977625f6da42dc50882641cffeb2cf9 (patch) | |
tree | 4747a3ed03fd87338e31888cfc9c4f7e6b6b6031 | |
parent | 9b6506b756a606d00a6c9c0d0c11bd0788f15451 (diff) |
xfs: increase the default parallelism levels of pwork clientspwork-parallelism_2020-06-01
Increase the default parallelism level for pwork clients so that we can
take advantage of computers with a lot of CPUs and a lot of hardware.
8x raid0 spinning rust running quotacheck:
1 39s
2 29s
4 26s
8 24s
24 (nr_cpus) 24s
4x raid0 sata ssds running quotacheck:
1 12s
2 12s
4 12s
8 13s
24 (nr_cpus) 14s
4x raid0 nvme ssds running quotacheck:
1 18s
2 18s
4 19s
8 20s
20 (nr_cpus) 20s
So, mixed results...
Signed-off-by: Darrick J. Wong <darrick.wong@oracle.com>
-rw-r--r-- | fs/xfs/xfs_buf.c | 34 | ||||
-rw-r--r-- | fs/xfs/xfs_buf.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_iwalk.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 39 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_pwork.c | 17 | ||||
-rw-r--r-- | fs/xfs/xfs_pwork.h | 2 |
7 files changed, 82 insertions, 14 deletions
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9ec3eaf1c618..65f8a1990acc 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -2186,3 +2186,37 @@ xfs_verify_magic16( return false; return dmagic == bp->b_ops->magic16[idx]; } + +/* Estimate the amount of parallelism available for a given device. */ +unsigned int +xfs_buftarg_guess_threads( + struct xfs_buftarg *btp) +{ + int iomin; + int ioopt; + + /* + * The device tells us that it is non-rotational, and we take that to + * mean there are no moving parts and that the device can handle all + * the CPUs throwing IO requests at it. + */ + if (blk_queue_nonrot(btp->bt_bdev->bd_queue)) + return num_online_cpus(); + + /* + * The device has a preferred and minimum IO size that suggest a RAID + * setup, so infer the number of disks and assume that the parallelism + * is equal to the disk count. + */ + iomin = bdev_io_min(btp->bt_bdev); + ioopt = bdev_io_opt(btp->bt_bdev); + if (iomin > 0 && ioopt > iomin) + return ioopt / iomin; + + /* + * The device did not indicate that it has any capabilities beyond that + * of a rotating disk with a single drive head, so we estimate no + * parallelism at all. + */ + return 1; +} diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index 9a04c53c2488..deaa9c2607af 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -350,6 +350,7 @@ extern xfs_buftarg_t *xfs_alloc_buftarg(struct xfs_mount *, extern void xfs_free_buftarg(struct xfs_buftarg *); extern void xfs_wait_buftarg(xfs_buftarg_t *); extern int xfs_setsize_buftarg(xfs_buftarg_t *, unsigned int); +unsigned int xfs_buftarg_guess_threads(struct xfs_buftarg *btp); #define xfs_getsize_buftarg(buftarg) block_size((buftarg)->bt_bdev) #define xfs_readonly_buftarg(buftarg) bdev_read_only((buftarg)->bt_bdev) diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 233dcc8784db..f2f89cae1f3c 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -605,7 +605,7 @@ xfs_iwalk_threaded( ASSERT(agno < mp->m_sb.sb_agcount); ASSERT(!(flags & ~XFS_IWALK_FLAGS_ALL)); - nr_threads = xfs_pwork_guess_datadev_parallelism(mp); + nr_threads = xfs_pwork_guess_threads(mp); error = xfs_pwork_init(mp, &pctl, xfs_iwalk_ag_work, "xfs_iwalk", nr_threads); if (error) diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index c5513e5a226a..a9d359f8fad2 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -1408,3 +1408,42 @@ xfs_mod_delalloc( percpu_counter_add_batch(&mp->m_delalloc_blks, delta, XFS_DELALLOC_BATCH); } + +/* + * Estimate the amount of parallelism that is available for metadata operations + * on this filesystem. + */ +unsigned int +xfs_guess_metadata_threads( + struct xfs_mount *mp) +{ + unsigned int threads; + + /* + * Estimate the amount of parallelism for metadata operations from the + * least capable of the two devices that handle metadata. Cap that + * estimate to the number of AGs to avoid unnecessary lock contention. + */ + threads = xfs_buftarg_guess_threads(mp->m_ddev_targp); + if (mp->m_logdev_targp != mp->m_ddev_targp) + threads = min(xfs_buftarg_guess_threads(mp->m_logdev_targp), + threads); + threads = min(mp->m_sb.sb_agcount, threads); + + /* If the storage told us it has fancy capabilities, we're done. */ + if (threads > 1) + goto clamp; + + /* + * Metadata storage did not even hint that it has any parallel + * capability. If the filesystem was formatted with a stripe unit and + * width, we'll treat that as evidence of a RAID setup and estimate + * the number of disks. + */ + if (mp->m_sb.sb_unit > 0 && mp->m_sb.sb_width > mp->m_sb.sb_unit) + threads = mp->m_sb.sb_width / mp->m_sb.sb_unit; + +clamp: + /* Don't return an estimate larger than the CPU count. */ + return min(num_online_cpus(), threads); +} diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index b2e4598fdf7d..dd397b778e76 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -417,5 +417,6 @@ struct xfs_error_cfg * xfs_error_get_cfg(struct xfs_mount *mp, int error_class, int error); void xfs_force_summary_recalc(struct xfs_mount *mp); void xfs_mod_delalloc(struct xfs_mount *mp, int64_t delta); +unsigned int xfs_guess_metadata_threads(struct xfs_mount *mp); #endif /* __XFS_MOUNT_H__ */ diff --git a/fs/xfs/xfs_pwork.c b/fs/xfs/xfs_pwork.c index 4bcc3e61056c..5f1a5e575a48 100644 --- a/fs/xfs/xfs_pwork.c +++ b/fs/xfs/xfs_pwork.c @@ -118,19 +118,12 @@ xfs_pwork_poll( touch_softlockup_watchdog(); } -/* - * Return the amount of parallelism that the data device can handle, or 0 for - * no limit. - */ +/* Estimate how many threads we need for a parallel work queue. */ unsigned int -xfs_pwork_guess_datadev_parallelism( +xfs_pwork_guess_threads( struct xfs_mount *mp) { - struct xfs_buftarg *btp = mp->m_ddev_targp; - - /* - * For now we'll go with the most conservative setting possible, - * which is two threads for an SSD and 1 thread everywhere else. - */ - return blk_queue_nonrot(btp->bt_bdev->bd_queue) ? 2 : 1; + /* pwork queues are not unbounded, so we have to abide WQ_MAX_ACTIVE. */ + return min_t(unsigned int, xfs_guess_metadata_threads(mp), + WQ_MAX_ACTIVE); } diff --git a/fs/xfs/xfs_pwork.h b/fs/xfs/xfs_pwork.h index 8133124cf3bb..f402920f7061 100644 --- a/fs/xfs/xfs_pwork.h +++ b/fs/xfs/xfs_pwork.h @@ -56,6 +56,6 @@ int xfs_pwork_init(struct xfs_mount *mp, struct xfs_pwork_ctl *pctl, void xfs_pwork_queue(struct xfs_pwork_ctl *pctl, struct xfs_pwork *pwork); int xfs_pwork_destroy(struct xfs_pwork_ctl *pctl); void xfs_pwork_poll(struct xfs_pwork_ctl *pctl); -unsigned int xfs_pwork_guess_datadev_parallelism(struct xfs_mount *mp); +unsigned int xfs_pwork_guess_threads(struct xfs_mount *mp); #endif /* __XFS_PWORK_H__ */ |