summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorJens Axboe <axboe@fb.com>2016-12-01 07:58:57 -0700
committerJens Axboe <axboe@fb.com>2016-12-01 07:58:57 -0700
commite314c4cc21c5637148a1f6abd2493746e4f7bc95 (patch)
tree981873106efdae4765482703a040155352ea388a /block
parenta5f78b7f7dd1f587905808ab19d2dc1c265dffd4 (diff)
parentd262920998c891dfd87cf73f823f0ff60e20cdad (diff)
Merge branch 'for-4.10/block' into for-next
Diffstat (limited to 'block')
-rw-r--r--block/bio.c1
-rw-r--r--block/blk-core.c4
-rw-r--r--block/blk-lib.c173
-rw-r--r--block/blk-merge.c17
-rw-r--r--block/blk-settings.c17
-rw-r--r--block/blk-sysfs.c11
-rw-r--r--block/blk-wbt.c5
-rw-r--r--block/partition-generic.c65
8 files changed, 251 insertions, 42 deletions
diff --git a/block/bio.c b/block/bio.c
index de257ced69b1..83db1f37fd0b 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -674,6 +674,7 @@ struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask,
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
break;
case REQ_OP_WRITE_SAME:
bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0];
diff --git a/block/blk-core.c b/block/blk-core.c
index 6c4a425690fc..3f2eb8d80189 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1950,6 +1950,10 @@ generic_make_request_checks(struct bio *bio)
if (!bdev_is_zoned(bio->bi_bdev))
goto not_supported;
break;
+ case REQ_OP_WRITE_ZEROES:
+ if (!bdev_write_zeroes_sectors(bio->bi_bdev))
+ goto not_supported;
+ break;
default:
break;
}
diff --git a/block/blk-lib.c b/block/blk-lib.c
index 18abda862915..510a6fb15318 100644
--- a/block/blk-lib.c
+++ b/block/blk-lib.c
@@ -137,24 +137,24 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
EXPORT_SYMBOL(blkdev_issue_discard);
/**
- * blkdev_issue_write_same - queue a write same operation
+ * __blkdev_issue_write_same - generate number of bios with same page
* @bdev: target blockdev
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
* @page: page containing data to write
+ * @biop: pointer to anchor bio
*
* Description:
- * Issue a write same request for the sectors in question.
+ * Generate and issue number of bios(REQ_OP_WRITE_SAME) with same page.
*/
-int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask,
- struct page *page)
+static int __blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct page *page,
+ struct bio **biop)
{
struct request_queue *q = bdev_get_queue(bdev);
unsigned int max_write_same_sectors;
- struct bio *bio = NULL;
- int ret = 0;
+ struct bio *bio = *biop;
sector_t bs_mask;
if (!q)
@@ -164,6 +164,9 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
+ if (!bdev_write_same(bdev))
+ return -EOPNOTSUPP;
+
/* Ensure that max_write_same_sectors doesn't overflow bi_size */
max_write_same_sectors = UINT_MAX >> 9;
@@ -185,32 +188,112 @@ int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
bio->bi_iter.bi_size = nr_sects << 9;
nr_sects = 0;
}
+ cond_resched();
}
- if (bio) {
+ *biop = bio;
+ return 0;
+}
+
+/**
+ * blkdev_issue_write_same - queue a write same operation
+ * @bdev: target blockdev
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @page: page containing data
+ *
+ * Description:
+ * Issue a write same request for the sectors in question.
+ */
+int blkdev_issue_write_same(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask,
+ struct page *page)
+{
+ struct bio *bio = NULL;
+ struct blk_plug plug;
+ int ret;
+
+ blk_start_plug(&plug);
+ ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask, page,
+ &bio);
+ if (ret == 0 && bio) {
ret = submit_bio_wait(bio);
bio_put(bio);
}
+ blk_finish_plug(&plug);
return ret;
}
EXPORT_SYMBOL(blkdev_issue_write_same);
/**
- * blkdev_issue_zeroout - generate number of zero filed write bios
+ * __blkdev_issue_write_zeroes - generate number of bios with WRITE ZEROES
* @bdev: blockdev to issue
* @sector: start sector
* @nr_sects: number of sectors to write
* @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
*
* Description:
- * Generate and issue number of bios with zerofiled pages.
+ * Generate and issue number of bios(REQ_OP_WRITE_ZEROES) with zerofiled pages.
*/
+static int __blkdev_issue_write_zeroes(struct block_device *bdev,
+ sector_t sector, sector_t nr_sects, gfp_t gfp_mask,
+ struct bio **biop)
+{
+ struct bio *bio = *biop;
+ unsigned int max_write_zeroes_sectors;
+ struct request_queue *q = bdev_get_queue(bdev);
+
+ if (!q)
+ return -ENXIO;
+
+ /* Ensure that max_write_zeroes_sectors doesn't overflow bi_size */
+ max_write_zeroes_sectors = bdev_write_zeroes_sectors(bdev);
+
+ if (max_write_zeroes_sectors == 0)
+ return -EOPNOTSUPP;
-static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask)
+ while (nr_sects) {
+ bio = next_bio(bio, 0, gfp_mask);
+ bio->bi_iter.bi_sector = sector;
+ bio->bi_bdev = bdev;
+ bio_set_op_attrs(bio, REQ_OP_WRITE_ZEROES, 0);
+
+ if (nr_sects > max_write_zeroes_sectors) {
+ bio->bi_iter.bi_size = max_write_zeroes_sectors << 9;
+ nr_sects -= max_write_zeroes_sectors;
+ sector += max_write_zeroes_sectors;
+ } else {
+ bio->bi_iter.bi_size = nr_sects << 9;
+ nr_sects = 0;
+ }
+ cond_resched();
+ }
+
+ *biop = bio;
+ return 0;
+}
+
+/**
+ * __blkdev_issue_zeroout - generate number of zero filed write bios
+ * @bdev: blockdev to issue
+ * @sector: start sector
+ * @nr_sects: number of sectors to write
+ * @gfp_mask: memory allocation flags (for bio_alloc)
+ * @biop: pointer to anchor bio
+ * @discard: discard flag
+ *
+ * Description:
+ * Generate and issue number of bios with zerofiled pages.
+ */
+int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
+ sector_t nr_sects, gfp_t gfp_mask, struct bio **biop,
+ bool discard)
{
int ret;
- struct bio *bio = NULL;
+ int bi_size = 0;
+ struct bio *bio = *biop;
unsigned int sz;
sector_t bs_mask;
@@ -218,6 +301,24 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
if ((sector | nr_sects) & bs_mask)
return -EINVAL;
+ if (discard) {
+ ret = __blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
+ BLKDEV_DISCARD_ZERO, biop);
+ if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+ goto out;
+ }
+
+ ret = __blkdev_issue_write_zeroes(bdev, sector, nr_sects, gfp_mask,
+ biop);
+ if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+ goto out;
+
+ ret = __blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
+ ZERO_PAGE(0), biop);
+ if (ret == 0 || (ret && ret != -EOPNOTSUPP))
+ goto out;
+
+ ret = 0;
while (nr_sects != 0) {
bio = next_bio(bio, min(nr_sects, (sector_t)BIO_MAX_PAGES),
gfp_mask);
@@ -227,21 +328,20 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
while (nr_sects != 0) {
sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects);
- ret = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
- nr_sects -= ret >> 9;
- sector += ret >> 9;
- if (ret < (sz << 9))
+ bi_size = bio_add_page(bio, ZERO_PAGE(0), sz << 9, 0);
+ nr_sects -= bi_size >> 9;
+ sector += bi_size >> 9;
+ if (bi_size < (sz << 9))
break;
}
+ cond_resched();
}
- if (bio) {
- ret = submit_bio_wait(bio);
- bio_put(bio);
- return ret;
- }
- return 0;
+ *biop = bio;
+out:
+ return ret;
}
+EXPORT_SYMBOL(__blkdev_issue_zeroout);
/**
* blkdev_issue_zeroout - zero-fill a block range
@@ -258,26 +358,27 @@ static int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
* the discard request fail, if the discard flag is not set, or if
* discard_zeroes_data is not supported, this function will resort to
* zeroing the blocks manually, thus provisioning (allocating,
- * anchoring) them. If the block device supports the WRITE SAME command
- * blkdev_issue_zeroout() will use it to optimize the process of
+ * anchoring) them. If the block device supports WRITE ZEROES or WRITE SAME
+ * command(s), blkdev_issue_zeroout() will use it to optimize the process of
* clearing the block range. Otherwise the zeroing will be performed
* using regular WRITE calls.
*/
-
int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, bool discard)
{
- if (discard) {
- if (!blkdev_issue_discard(bdev, sector, nr_sects, gfp_mask,
- BLKDEV_DISCARD_ZERO))
- return 0;
- }
+ int ret;
+ struct bio *bio = NULL;
+ struct blk_plug plug;
- if (bdev_write_same(bdev) &&
- blkdev_issue_write_same(bdev, sector, nr_sects, gfp_mask,
- ZERO_PAGE(0)) == 0)
- return 0;
+ blk_start_plug(&plug);
+ ret = __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask,
+ &bio, discard);
+ if (ret == 0 && bio) {
+ ret = submit_bio_wait(bio);
+ bio_put(bio);
+ }
+ blk_finish_plug(&plug);
- return __blkdev_issue_zeroout(bdev, sector, nr_sects, gfp_mask);
+ return ret;
}
EXPORT_SYMBOL(blkdev_issue_zeroout);
diff --git a/block/blk-merge.c b/block/blk-merge.c
index fda6a12fc776..cf2848cb91d8 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -199,6 +199,10 @@ void blk_queue_split(struct request_queue *q, struct bio **bio,
case REQ_OP_SECURE_ERASE:
split = blk_bio_discard_split(q, *bio, bs, &nsegs);
break;
+ case REQ_OP_WRITE_ZEROES:
+ split = NULL;
+ nsegs = (*bio)->bi_phys_segments;
+ break;
case REQ_OP_WRITE_SAME:
split = blk_bio_write_same_split(q, *bio, bs, &nsegs);
break;
@@ -241,11 +245,15 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q,
* This should probably be returning 0, but blk_add_request_payload()
* (Christoph!!!!)
*/
- if (bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_SECURE_ERASE)
- return 1;
-
- if (bio_op(bio) == REQ_OP_WRITE_SAME)
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
return 1;
+ default:
+ break;
+ }
fbio = bio;
cluster = blk_queue_cluster(q);
@@ -416,6 +424,7 @@ static int __blk_bios_map_sg(struct request_queue *q, struct bio *bio,
switch (bio_op(bio)) {
case REQ_OP_DISCARD:
case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_ZEROES:
/*
* This is a hack - drivers should be neither modifying the
* biovec, nor relying on bi_vcnt - but because of
diff --git a/block/blk-settings.c b/block/blk-settings.c
index c7ccabc0ec3e..8a2bc124a684 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -96,6 +96,7 @@ void blk_set_default_limits(struct queue_limits *lim)
lim->max_dev_sectors = 0;
lim->chunk_sectors = 0;
lim->max_write_same_sectors = 0;
+ lim->max_write_zeroes_sectors = 0;
lim->max_discard_sectors = 0;
lim->max_hw_discard_sectors = 0;
lim->discard_granularity = 0;
@@ -132,6 +133,7 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_sectors = UINT_MAX;
lim->max_dev_sectors = UINT_MAX;
lim->max_write_same_sectors = UINT_MAX;
+ lim->max_write_zeroes_sectors = UINT_MAX;
}
EXPORT_SYMBOL(blk_set_stacking_limits);
@@ -300,6 +302,19 @@ void blk_queue_max_write_same_sectors(struct request_queue *q,
EXPORT_SYMBOL(blk_queue_max_write_same_sectors);
/**
+ * blk_queue_max_write_zeroes_sectors - set max sectors for a single
+ * write zeroes
+ * @q: the request queue for the device
+ * @max_write_zeroes_sectors: maximum number of sectors to write per command
+ **/
+void blk_queue_max_write_zeroes_sectors(struct request_queue *q,
+ unsigned int max_write_zeroes_sectors)
+{
+ q->limits.max_write_zeroes_sectors = max_write_zeroes_sectors;
+}
+EXPORT_SYMBOL(blk_queue_max_write_zeroes_sectors);
+
+/**
* blk_queue_max_segments - set max hw segments for a request for this queue
* @q: the request queue for the device
* @max_segments: max number of segments
@@ -527,6 +542,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_same_sectors = min(t->max_write_same_sectors,
b->max_write_same_sectors);
+ t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
+ b->max_write_zeroes_sectors);
t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn);
t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a97841491769..706b27bd73a1 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -211,6 +211,11 @@ static ssize_t queue_write_same_max_show(struct request_queue *q, char *page)
(unsigned long long)q->limits.max_write_same_sectors << 9);
}
+static ssize_t queue_write_zeroes_max_show(struct request_queue *q, char *page)
+{
+ return sprintf(page, "%llu\n",
+ (unsigned long long)q->limits.max_write_zeroes_sectors << 9);
+}
static ssize_t
queue_max_sectors_store(struct request_queue *q, const char *page, size_t count)
@@ -611,6 +616,11 @@ static struct queue_sysfs_entry queue_write_same_max_entry = {
.show = queue_write_same_max_show,
};
+static struct queue_sysfs_entry queue_write_zeroes_max_entry = {
+ .attr = {.name = "write_zeroes_max_bytes", .mode = S_IRUGO },
+ .show = queue_write_zeroes_max_show,
+};
+
static struct queue_sysfs_entry queue_nonrot_entry = {
.attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR },
.show = queue_show_nonrot,
@@ -700,6 +710,7 @@ static struct attribute *default_attrs[] = {
&queue_discard_max_hw_entry.attr,
&queue_discard_zeroes_data_entry.attr,
&queue_write_same_max_entry.attr,
+ &queue_write_zeroes_max_entry.attr,
&queue_nonrot_entry.attr,
&queue_zoned_entry.attr,
&queue_nomerges_entry.attr,
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index b8647343141f..d500e43da5d9 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -575,9 +575,10 @@ static inline bool wbt_should_throttle(struct rq_wb *rwb, struct bio *bio)
const int op = bio_op(bio);
/*
- * If not a WRITE (or a discard), do nothing
+ * If not a WRITE (or a discard or write zeroes), do nothing
*/
- if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD))
+ if (!(op == REQ_OP_WRITE || op == REQ_OP_DISCARD ||
+ op == REQ_OP_WRITE_ZEROES))
return false;
/*
diff --git a/block/partition-generic.c b/block/partition-generic.c
index 71d9ed9df8da..d7beb6bbbf66 100644
--- a/block/partition-generic.c
+++ b/block/partition-generic.c
@@ -430,6 +430,56 @@ static int drop_partitions(struct gendisk *disk, struct block_device *bdev)
return 0;
}
+static bool part_zone_aligned(struct gendisk *disk,
+ struct block_device *bdev,
+ sector_t from, sector_t size)
+{
+ unsigned int zone_size = bdev_zone_size(bdev);
+
+ /*
+ * If this function is called, then the disk is a zoned block device
+ * (host-aware or host-managed). This can be detected even if the
+ * zoned block device support is disabled (CONFIG_BLK_DEV_ZONED not
+ * set). In this case, however, only host-aware devices will be seen
+ * as a block device is not created for host-managed devices. Without
+ * zoned block device support, host-aware drives can still be used as
+ * regular block devices (no zone operation) and their zone size will
+ * be reported as 0. Allow this case.
+ */
+ if (!zone_size)
+ return true;
+
+ /*
+ * Check partition start and size alignement. If the drive has a
+ * smaller last runt zone, ignore it and allow the partition to
+ * use it. Check the zone size too: it should be a power of 2 number
+ * of sectors.
+ */
+ if (WARN_ON_ONCE(!is_power_of_2(zone_size))) {
+ u32 rem;
+
+ div_u64_rem(from, zone_size, &rem);
+ if (rem)
+ return false;
+ if ((from + size) < get_capacity(disk)) {
+ div_u64_rem(size, zone_size, &rem);
+ if (rem)
+ return false;
+ }
+
+ } else {
+
+ if (from & (zone_size - 1))
+ return false;
+ if ((from + size) < get_capacity(disk) &&
+ (size & (zone_size - 1)))
+ return false;
+
+ }
+
+ return true;
+}
+
int rescan_partitions(struct gendisk *disk, struct block_device *bdev)
{
struct parsed_partitions *state = NULL;
@@ -529,6 +579,21 @@ rescan:
}
}
+ /*
+ * On a zoned block device, partitions should be aligned on the
+ * device zone size (i.e. zone boundary crossing not allowed).
+ * Otherwise, resetting the write pointer of the last zone of
+ * one partition may impact the following partition.
+ */
+ if (bdev_is_zoned(bdev) &&
+ !part_zone_aligned(disk, bdev, from, size)) {
+ printk(KERN_WARNING
+ "%s: p%d start %llu+%llu is not zone aligned\n",
+ disk->disk_name, p, (unsigned long long) from,
+ (unsigned long long) size);
+ continue;
+ }
+
part = add_partition(disk, p, from, size,
state->parts[p].flags,
&state->parts[p].info);