summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 18:19:39 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-30 18:19:39 -0700
commit2cfa582be80081fb8db02d4d9b44bff34b82ac54 (patch)
tree2faf8db8426b389ca8c9ed76065c688431bb7eb9 /block
parentdbe69e43372212527abf48609aba7fc39a6daa27 (diff)
parent5c0de3d72f8c05678ed769bea24e98128f7ab570 (diff)
Merge tag 'for-5.14/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer: - Various DM persistent-data library improvements and fixes that benefit both the DM thinp and cache targets. - A few small DM kcopyd efficiency improvements. - Significant zoned related block core, DM core and DM zoned target changes that culminate with adding zoned append emulation (which is required to properly fix DM crypt's zoned support). - Various DM writecache target changes that improve efficiency. Adds an optional "metadata_only" feature that only promotes bios flagged with REQ_META. But the most significant improvement is writecache's ability to pause writeback, for a confiurable time, if/when the working set is larger than the cache (and the cache is full) -- this ensures performance is no worse than the slower origin device. * tag 'for-5.14/dm-changes' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (35 commits) dm writecache: make writeback pause configurable dm writecache: pause writeback if cache full and origin being written directly dm io tracker: factor out IO tracker dm btree remove: assign new_root only when removal succeeds dm zone: fix dm_revalidate_zones() memory allocation dm ps io affinity: remove redundant continue statement dm writecache: add optional "metadata_only" parameter dm writecache: add "cleaner" and "max_age" to Documentation dm writecache: write at least 4k when committing dm writecache: flush origin device when writing and cache is full dm writecache: have ssd writeback wait if the kcopyd workqueue is busy dm writecache: use list_move instead of list_del/list_add in writecache_writeback() dm writecache: commit just one block, not a full page dm writecache: remove unused gfp_t argument from wc_add_block() dm crypt: Fix zoned block device support dm: introduce zone append emulation dm: rearrange core declarations for extended use from dm-zone.c block: introduce BIO_ZONE_WRITE_LOCKED bio flag block: introduce bio zone helpers block: improve handling of all zones reset operation ...
Diffstat (limited to 'block')
-rw-r--r--block/blk-zoned.c119
1 files changed, 92 insertions, 27 deletions
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 250cb76ee615..86fce751bb17 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -161,18 +161,89 @@ int blkdev_report_zones(struct block_device *bdev, sector_t sector,
}
EXPORT_SYMBOL_GPL(blkdev_report_zones);
-static inline bool blkdev_allow_reset_all_zones(struct block_device *bdev,
- sector_t sector,
- sector_t nr_sectors)
+static inline unsigned long *blk_alloc_zone_bitmap(int node,
+ unsigned int nr_zones)
{
- if (!blk_queue_zone_resetall(bdev_get_queue(bdev)))
- return false;
+ return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
+ GFP_NOIO, node);
+}
+static int blk_zone_need_reset_cb(struct blk_zone *zone, unsigned int idx,
+ void *data)
+{
/*
- * REQ_OP_ZONE_RESET_ALL can be executed only if the number of sectors
- * of the applicable zone range is the entire disk.
+ * For an all-zones reset, ignore conventional, empty, read-only
+ * and offline zones.
*/
- return !sector && nr_sectors == get_capacity(bdev->bd_disk);
+ switch (zone->cond) {
+ case BLK_ZONE_COND_NOT_WP:
+ case BLK_ZONE_COND_EMPTY:
+ case BLK_ZONE_COND_READONLY:
+ case BLK_ZONE_COND_OFFLINE:
+ return 0;
+ default:
+ set_bit(idx, (unsigned long *)data);
+ return 0;
+ }
+}
+
+static int blkdev_zone_reset_all_emulated(struct block_device *bdev,
+ gfp_t gfp_mask)
+{
+ struct request_queue *q = bdev_get_queue(bdev);
+ sector_t capacity = get_capacity(bdev->bd_disk);
+ sector_t zone_sectors = blk_queue_zone_sectors(q);
+ unsigned long *need_reset;
+ struct bio *bio = NULL;
+ sector_t sector = 0;
+ int ret;
+
+ need_reset = blk_alloc_zone_bitmap(q->node, q->nr_zones);
+ if (!need_reset)
+ return -ENOMEM;
+
+ ret = bdev->bd_disk->fops->report_zones(bdev->bd_disk, 0,
+ q->nr_zones, blk_zone_need_reset_cb,
+ need_reset);
+ if (ret < 0)
+ goto out_free_need_reset;
+
+ ret = 0;
+ while (sector < capacity) {
+ if (!test_bit(blk_queue_zone_no(q, sector), need_reset)) {
+ sector += zone_sectors;
+ continue;
+ }
+
+ bio = blk_next_bio(bio, 0, gfp_mask);
+ bio_set_dev(bio, bdev);
+ bio->bi_opf = REQ_OP_ZONE_RESET | REQ_SYNC;
+ bio->bi_iter.bi_sector = sector;
+ sector += zone_sectors;
+
+ /* This may take a while, so be nice to others */
+ cond_resched();
+ }
+
+ if (bio) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+
+out_free_need_reset:
+ kfree(need_reset);
+ return ret;
+}
+
+static int blkdev_zone_reset_all(struct block_device *bdev, gfp_t gfp_mask)
+{
+ struct bio bio;
+
+ bio_init(&bio, NULL, 0);
+ bio_set_dev(&bio, bdev);
+ bio.bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
+
+ return submit_bio_wait(&bio);
}
/**
@@ -200,7 +271,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
sector_t capacity = get_capacity(bdev->bd_disk);
sector_t end_sector = sector + nr_sectors;
struct bio *bio = NULL;
- int ret;
+ int ret = 0;
if (!blk_queue_is_zoned(q))
return -EOPNOTSUPP;
@@ -222,20 +293,21 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
return -EINVAL;
+ /*
+ * In the case of a zone reset operation over all zones,
+ * REQ_OP_ZONE_RESET_ALL can be used with devices supporting this
+ * command. For other devices, we emulate this command behavior by
+ * identifying the zones needing a reset.
+ */
+ if (op == REQ_OP_ZONE_RESET && sector == 0 && nr_sectors == capacity) {
+ if (!blk_queue_zone_resetall(q))
+ return blkdev_zone_reset_all_emulated(bdev, gfp_mask);
+ return blkdev_zone_reset_all(bdev, gfp_mask);
+ }
+
while (sector < end_sector) {
bio = blk_next_bio(bio, 0, gfp_mask);
bio_set_dev(bio, bdev);
-
- /*
- * Special case for the zone reset operation that reset all
- * zones, this is useful for applications like mkfs.
- */
- if (op == REQ_OP_ZONE_RESET &&
- blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
- bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
- break;
- }
-
bio->bi_opf = op | REQ_SYNC;
bio->bi_iter.bi_sector = sector;
sector += zone_sectors;
@@ -396,13 +468,6 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
return ret;
}
-static inline unsigned long *blk_alloc_zone_bitmap(int node,
- unsigned int nr_zones)
-{
- return kcalloc_node(BITS_TO_LONGS(nr_zones), sizeof(unsigned long),
- GFP_NOIO, node);
-}
-
void blk_queue_free_zone_bitmaps(struct request_queue *q)
{
kfree(q->conv_zones_bitmap);