diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile | 2 | ||||
-rw-r--r-- | block/bio.c | 37 | ||||
-rw-r--r-- | block/blk-cgroup.c | 10 | ||||
-rw-r--r-- | block/blk-core.c | 28 | ||||
-rw-r--r-- | block/blk-ia-ranges.c | 348 | ||||
-rw-r--r-- | block/blk-mq-debugfs.c | 3 | ||||
-rw-r--r-- | block/blk-mq-sched.c | 2 | ||||
-rw-r--r-- | block/blk-mq.c | 93 | ||||
-rw-r--r-- | block/blk-mq.h | 8 | ||||
-rw-r--r-- | block/blk-settings.c | 20 | ||||
-rw-r--r-- | block/blk-sysfs.c | 26 | ||||
-rw-r--r-- | block/blk.h | 4 | ||||
-rw-r--r-- | block/bsg-lib.c | 32 | ||||
-rw-r--r-- | block/fops.c | 153 | ||||
-rw-r--r-- | block/genhd.c | 23 | ||||
-rw-r--r-- | block/ioctl.c | 20 | ||||
-rw-r--r-- | block/partitions/core.c | 2 | ||||
-rw-r--r-- | block/partitions/efi.c | 2 | ||||
-rw-r--r-- | block/partitions/ibm.c | 19 |
19 files changed, 628 insertions, 204 deletions
diff --git a/block/Makefile b/block/Makefile index 602f7f47b7b6..44df57e562bf 100644 --- a/block/Makefile +++ b/block/Makefile @@ -9,7 +9,7 @@ obj-y := bdev.o fops.o bio.o elevator.o blk-core.o blk-sysfs.o \ blk-lib.o blk-mq.o blk-mq-tag.o blk-stat.o \ blk-mq-sysfs.o blk-mq-cpumap.o blk-mq-sched.o ioctl.o \ genhd.o ioprio.o badblocks.o partitions/ blk-rq-qos.o \ - disk-events.o + disk-events.o blk-ia-ranges.o obj-$(CONFIG_BOUNCE) += bounce.o obj-$(CONFIG_BLK_DEV_BSG_COMMON) += bsg.o diff --git a/block/bio.c b/block/bio.c index 46a87c72d2b4..15ab0d6d1c06 100644 --- a/block/bio.c +++ b/block/bio.c @@ -1046,36 +1046,27 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty) } EXPORT_SYMBOL_GPL(__bio_release_pages); -static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) +void bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) { + size_t size = iov_iter_count(iter); + WARN_ON_ONCE(bio->bi_max_vecs); + if (bio_op(bio) == REQ_OP_ZONE_APPEND) { + struct request_queue *q = bdev_get_queue(bio->bi_bdev); + size_t max_sectors = queue_max_zone_append_sectors(q); + + size = min(size, max_sectors << SECTOR_SHIFT); + } + bio->bi_vcnt = iter->nr_segs; bio->bi_io_vec = (struct bio_vec *)iter->bvec; bio->bi_iter.bi_bvec_done = iter->iov_offset; - bio->bi_iter.bi_size = iter->count; + bio->bi_iter.bi_size = size; bio_set_flag(bio, BIO_NO_PAGE_REF); bio_set_flag(bio, BIO_CLONED); } -static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter) -{ - __bio_iov_bvec_set(bio, iter); - iov_iter_advance(iter, iter->count); - return 0; -} - -static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter) -{ - struct request_queue *q = bdev_get_queue(bio->bi_bdev); - struct iov_iter i = *iter; - - iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9); - __bio_iov_bvec_set(bio, &i); - iov_iter_advance(iter, i.count); - return 0; -} - static void bio_put_pages(struct page **pages, size_t size, size_t off) { size_t i, nr = DIV_ROUND_UP(size + (off & ~PAGE_MASK), PAGE_SIZE); @@ -1217,9 +1208,9 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) int ret = 0; if (iov_iter_is_bvec(iter)) { - if (bio_op(bio) == REQ_OP_ZONE_APPEND) - return bio_iov_bvec_set_append(bio, iter); - return bio_iov_bvec_set(bio, iter); + bio_iov_bvec_set(bio, iter); + iov_iter_advance(iter, bio->bi_iter.bi_size); + return 0; } do { diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 8908298d6ad3..88b1fce90520 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -634,6 +634,14 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, q = bdev_get_queue(bdev); + /* + * blkcg_deactivate_policy() requires queue to be frozen, we can grab + * q_usage_counter to prevent concurrent with blkcg_deactivate_policy(). + */ + ret = blk_queue_enter(q, 0); + if (ret) + return ret; + rcu_read_lock(); spin_lock_irq(&q->queue_lock); @@ -703,6 +711,7 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, goto success; } success: + blk_queue_exit(q); ctx->bdev = bdev; ctx->blkg = blkg; ctx->body = input; @@ -715,6 +724,7 @@ fail_unlock: rcu_read_unlock(); fail: blkdev_put_no_open(bdev); + blk_queue_exit(q); /* * If queue was bypassing, we should retry. Do so after a * short msleep(). It isn't strictly necessary but queue diff --git a/block/blk-core.c b/block/blk-core.c index fd389a16013c..ac1de7d73a45 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -597,34 +597,6 @@ bool blk_get_queue(struct request_queue *q) } EXPORT_SYMBOL(blk_get_queue); -/** - * blk_get_request - allocate a request - * @q: request queue to allocate a request for - * @op: operation (REQ_OP_*) and REQ_* flags, e.g. REQ_SYNC. - * @flags: BLK_MQ_REQ_* flags, e.g. BLK_MQ_REQ_NOWAIT. - */ -struct request *blk_get_request(struct request_queue *q, unsigned int op, - blk_mq_req_flags_t flags) -{ - struct request *req; - - WARN_ON_ONCE(op & REQ_NOWAIT); - WARN_ON_ONCE(flags & ~(BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_PM)); - - req = blk_mq_alloc_request(q, op, flags); - if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn) - q->mq_ops->initialize_rq_fn(req); - - return req; -} -EXPORT_SYMBOL(blk_get_request); - -void blk_put_request(struct request *req) -{ - blk_mq_free_request(req); -} -EXPORT_SYMBOL(blk_put_request); - static void handle_bad_sector(struct bio *bio, sector_t maxsector) { char b[BDEVNAME_SIZE]; diff --git a/block/blk-ia-ranges.c b/block/blk-ia-ranges.c new file mode 100644 index 000000000000..c246c425d0d7 --- /dev/null +++ b/block/blk-ia-ranges.c @@ -0,0 +1,348 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Block device concurrent positioning ranges. + * + * Copyright (C) 2021 Western Digital Corporation or its Affiliates. + */ +#include <linux/kernel.h> +#include <linux/blkdev.h> +#include <linux/slab.h> +#include <linux/init.h> + +#include "blk.h" + +static ssize_t +blk_ia_range_sector_show(struct blk_independent_access_range *iar, + char *buf) +{ + return sprintf(buf, "%llu\n", iar->sector); +} + +static ssize_t +blk_ia_range_nr_sectors_show(struct blk_independent_access_range *iar, + char *buf) +{ + return sprintf(buf, "%llu\n", iar->nr_sectors); +} + +struct blk_ia_range_sysfs_entry { + struct attribute attr; + ssize_t (*show)(struct blk_independent_access_range *iar, char *buf); +}; + +static struct blk_ia_range_sysfs_entry blk_ia_range_sector_entry = { + .attr = { .name = "sector", .mode = 0444 }, + .show = blk_ia_range_sector_show, +}; + +static struct blk_ia_range_sysfs_entry blk_ia_range_nr_sectors_entry = { + .attr = { .name = "nr_sectors", .mode = 0444 }, + .show = blk_ia_range_nr_sectors_show, +}; + +static struct attribute *blk_ia_range_attrs[] = { + &blk_ia_range_sector_entry.attr, + &blk_ia_range_nr_sectors_entry.attr, + NULL, +}; +ATTRIBUTE_GROUPS(blk_ia_range); + +static ssize_t blk_ia_range_sysfs_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct blk_ia_range_sysfs_entry *entry = + container_of(attr, struct blk_ia_range_sysfs_entry, attr); + struct blk_independent_access_range *iar = + container_of(kobj, struct blk_independent_access_range, kobj); + ssize_t ret; + + mutex_lock(&iar->queue->sysfs_lock); + ret = entry->show(iar, buf); + mutex_unlock(&iar->queue->sysfs_lock); + + return ret; +} + +static const struct sysfs_ops blk_ia_range_sysfs_ops = { + .show = blk_ia_range_sysfs_show, +}; + +/* + * Independent access range entries are not freed individually, but alltogether + * with struct blk_independent_access_ranges and its array of ranges. Since + * kobject_add() takes a reference on the parent kobject contained in + * struct blk_independent_access_ranges, the array of independent access range + * entries cannot be freed until kobject_del() is called for all entries. + * So we do not need to do anything here, but still need this no-op release + * operation to avoid complaints from the kobject code. + */ +static void blk_ia_range_sysfs_nop_release(struct kobject *kobj) +{ +} + +static struct kobj_type blk_ia_range_ktype = { + .sysfs_ops = &blk_ia_range_sysfs_ops, + .default_groups = blk_ia_range_groups, + .release = blk_ia_range_sysfs_nop_release, +}; + +/* + * This will be executed only after all independent access range entries are + * removed with kobject_del(), at which point, it is safe to free everything, + * including the array of ranges. + */ +static void blk_ia_ranges_sysfs_release(struct kobject *kobj) +{ + struct blk_independent_access_ranges *iars = + container_of(kobj, struct blk_independent_access_ranges, kobj); + + kfree(iars); +} + +static struct kobj_type blk_ia_ranges_ktype = { + .release = blk_ia_ranges_sysfs_release, +}; + +/** + * disk_register_ia_ranges - register with sysfs a set of independent + * access ranges + * @disk: Target disk + * @new_iars: New set of independent access ranges + * + * Register with sysfs a set of independent access ranges for @disk. + * If @new_iars is not NULL, this set of ranges is registered and the old set + * specified by q->ia_ranges is unregistered. Otherwise, q->ia_ranges is + * registered if it is not already. + */ +int disk_register_independent_access_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *new_iars) +{ + struct request_queue *q = disk->queue; + struct blk_independent_access_ranges *iars; + int i, ret; + + lockdep_assert_held(&q->sysfs_dir_lock); + lockdep_assert_held(&q->sysfs_lock); + + /* If a new range set is specified, unregister the old one */ + if (new_iars) { + if (q->ia_ranges) + disk_unregister_independent_access_ranges(disk); + q->ia_ranges = new_iars; + } + + iars = q->ia_ranges; + if (!iars) + return 0; + + /* + * At this point, iars is the new set of sector access ranges that needs + * to be registered with sysfs. + */ + WARN_ON(iars->sysfs_registered); + ret = kobject_init_and_add(&iars->kobj, &blk_ia_ranges_ktype, + &q->kobj, "%s", "independent_access_ranges"); + if (ret) { + q->ia_ranges = NULL; + kfree(iars); + return ret; + } + + for (i = 0; i < iars->nr_ia_ranges; i++) { + iars->ia_range[i].queue = q; + ret = kobject_init_and_add(&iars->ia_range[i].kobj, + &blk_ia_range_ktype, &iars->kobj, + "%d", i); + if (ret) { + while (--i >= 0) + kobject_del(&iars->ia_range[i].kobj); + kobject_del(&iars->kobj); + kobject_put(&iars->kobj); + return ret; + } + } + + iars->sysfs_registered = true; + + return 0; +} + +void disk_unregister_independent_access_ranges(struct gendisk *disk) +{ + struct request_queue *q = disk->queue; + struct blk_independent_access_ranges *iars = q->ia_ranges; + int i; + + lockdep_assert_held(&q->sysfs_dir_lock); + lockdep_assert_held(&q->sysfs_lock); + + if (!iars) + return; + + if (iars->sysfs_registered) { + for (i = 0; i < iars->nr_ia_ranges; i++) + kobject_del(&iars->ia_range[i].kobj); + kobject_del(&iars->kobj); + kobject_put(&iars->kobj); + } else { + kfree(iars); + } + + q->ia_ranges = NULL; +} + +static struct blk_independent_access_range * +disk_find_ia_range(struct blk_independent_access_ranges *iars, + sector_t sector) +{ + struct blk_independent_access_range *iar; + int i; + + for (i = 0; i < iars->nr_ia_ranges; i++) { + iar = &iars->ia_range[i]; + if (sector >= iar->sector && + sector < iar->sector + iar->nr_sectors) + return iar; + } + + return NULL; +} + +static bool disk_check_ia_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *iars) +{ + struct blk_independent_access_range *iar, *tmp; + sector_t capacity = get_capacity(disk); + sector_t sector = 0; + int i; + + /* + * While sorting the ranges in increasing LBA order, check that the + * ranges do not overlap, that there are no sector holes and that all + * sectors belong to one range. + */ + for (i = 0; i < iars->nr_ia_ranges; i++) { + tmp = disk_find_ia_range(iars, sector); + if (!tmp || tmp->sector != sector) { + pr_warn("Invalid non-contiguous independent access ranges\n"); + return false; + } + + iar = &iars->ia_range[i]; + if (tmp != iar) { + swap(iar->sector, tmp->sector); + swap(iar->nr_sectors, tmp->nr_sectors); + } + + sector += iar->nr_sectors; + } + + if (sector != capacity) { + pr_warn("Independent access ranges do not match disk capacity\n"); + return false; + } + + return true; +} + +static bool disk_ia_ranges_changed(struct gendisk *disk, + struct blk_independent_access_ranges *new) +{ + struct blk_independent_access_ranges *old = disk->queue->ia_ranges; + int i; + + if (!old) + return true; + + if (old->nr_ia_ranges != new->nr_ia_ranges) + return true; + + for (i = 0; i < old->nr_ia_ranges; i++) { + if (new->ia_range[i].sector != old->ia_range[i].sector || + new->ia_range[i].nr_sectors != old->ia_range[i].nr_sectors) + return true; + } + + return false; +} + +/** + * disk_alloc_independent_access_ranges - Allocate an independent access ranges + * data structure + * @disk: target disk + * @nr_ia_ranges: Number of independent access ranges + * + * Allocate a struct blk_independent_access_ranges structure with @nr_ia_ranges + * access range descriptors. + */ +struct blk_independent_access_ranges * +disk_alloc_independent_access_ranges(struct gendisk *disk, int nr_ia_ranges) +{ + struct blk_independent_access_ranges *iars; + + iars = kzalloc_node(struct_size(iars, ia_range, nr_ia_ranges), + GFP_KERNEL, disk->queue->node); + if (iars) + iars->nr_ia_ranges = nr_ia_ranges; + return iars; +} +EXPORT_SYMBOL_GPL(disk_alloc_independent_access_ranges); + +/** + * disk_set_independent_access_ranges - Set a disk independent access ranges + * @disk: target disk + * @iars: independent access ranges structure + * + * Set the independent access ranges information of the request queue + * of @disk to @iars. If @iars is NULL and the independent access ranges + * structure already set is cleared. If there are no differences between + * @iars and the independent access ranges structure already set, @iars + * is freed. + */ +void disk_set_independent_access_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *iars) +{ + struct request_queue *q = disk->queue; + + if (WARN_ON_ONCE(iars && !iars->nr_ia_ranges)) { + kfree(iars); + iars = NULL; + } + + mutex_lock(&q->sysfs_dir_lock); + mutex_lock(&q->sysfs_lock); + + if (iars) { + if (!disk_check_ia_ranges(disk, iars)) { + kfree(iars); + iars = NULL; + goto reg; + } + + if (!disk_ia_ranges_changed(disk, iars)) { + kfree(iars); + goto unlock; + } + } + + /* + * This may be called for a registered queue. E.g. during a device + * revalidation. If that is the case, we need to unregister the old + * set of independent access ranges and register the new set. If the + * queue is not registered, registration of the device request queue + * will register the independent access ranges, so only swap in the + * new set and free the old one. + */ +reg: + if (blk_queue_registered(q)) { + disk_register_independent_access_ranges(disk, iars); + } else { + swap(q->ia_ranges, iars); + kfree(iars); + } + +unlock: + mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); +} +EXPORT_SYMBOL_GPL(disk_set_independent_access_ranges); diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 68ca5d21cda7..f5076c173477 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -124,7 +124,6 @@ static const char *const blk_queue_flag_name[] = { QUEUE_FLAG_NAME(STATS), QUEUE_FLAG_NAME(POLL_STATS), QUEUE_FLAG_NAME(REGISTERED), - QUEUE_FLAG_NAME(SCSI_PASSTHROUGH), QUEUE_FLAG_NAME(QUIESCED), QUEUE_FLAG_NAME(PCI_P2PDMA), QUEUE_FLAG_NAME(ZONE_RESETALL), @@ -550,7 +549,7 @@ static int hctx_active_show(void *data, struct seq_file *m) { struct blk_mq_hw_ctx *hctx = data; - seq_printf(m, "%d\n", atomic_read(&hctx->nr_active)); + seq_printf(m, "%d\n", __blk_mq_active_requests(hctx)); return 0; } diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c index 5b259fdea794..c62b966dfaba 100644 --- a/block/blk-mq-sched.c +++ b/block/blk-mq-sched.c @@ -541,7 +541,7 @@ static void blk_mq_sched_tags_teardown(struct request_queue *q, unsigned int fla queue_for_each_hw_ctx(q, hctx, i) { if (hctx->sched_tags) { - if (!blk_mq_is_shared_tags(q->tag_set->flags)) + if (!blk_mq_is_shared_tags(flags)) blk_mq_free_rq_map(hctx->sched_tags); hctx->sched_tags = NULL; } diff --git a/block/blk-mq.c b/block/blk-mq.c index d04ee72ba125..07eb1412760b 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -316,40 +316,37 @@ void blk_mq_wake_waiters(struct request_queue *q) } static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, - unsigned int tag, u64 alloc_time_ns) + struct blk_mq_tags *tags, unsigned int tag, u64 alloc_time_ns) { struct blk_mq_ctx *ctx = data->ctx; struct blk_mq_hw_ctx *hctx = data->hctx; struct request_queue *q = data->q; - struct elevator_queue *e = q->elevator; - struct blk_mq_tags *tags = blk_mq_tags_from_data(data); struct request *rq = tags->static_rqs[tag]; - unsigned int rq_flags = 0; - if (e) { - rq_flags = RQF_ELV; - rq->tag = BLK_MQ_NO_TAG; - rq->internal_tag = tag; - } else { - rq->tag = tag; - rq->internal_tag = BLK_MQ_NO_TAG; - } + rq->q = q; + rq->mq_ctx = ctx; + rq->mq_hctx = hctx; + rq->cmd_flags = data->cmd_flags; if (data->flags & BLK_MQ_REQ_PM) - rq_flags |= RQF_PM; + data->rq_flags |= RQF_PM; if (blk_queue_io_stat(q)) - rq_flags |= RQF_IO_STAT; - rq->rq_flags = rq_flags; + data->rq_flags |= RQF_IO_STAT; + rq->rq_flags = data->rq_flags; + + if (!(data->rq_flags & RQF_ELV)) { + rq->tag = tag; + rq->internal_tag = BLK_MQ_NO_TAG; + } else { + rq->tag = BLK_MQ_NO_TAG; + rq->internal_tag = tag; + } + rq->timeout = 0; if (blk_mq_need_time_stamp(rq)) rq->start_time_ns = ktime_get_ns(); else rq->start_time_ns = 0; - /* csd/requeue_work/fifo_time is initialized before use */ - rq->q = q; - rq->mq_ctx = ctx; - rq->mq_hctx = hctx; - rq->cmd_flags = data->cmd_flags; rq->rq_disk = NULL; rq->part = NULL; #ifdef CONFIG_BLK_RQ_ALLOC_TIME @@ -361,7 +358,6 @@ static struct request *blk_mq_rq_ctx_init(struct blk_mq_alloc_data *data, #if defined(CONFIG_BLK_DEV_INTEGRITY) rq->nr_integrity_segments = 0; #endif - rq->timeout = 0; rq->end_io = NULL; rq->end_io_data = NULL; @@ -396,20 +392,23 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data, u64 alloc_time_ns) { unsigned int tag, tag_offset; + struct blk_mq_tags *tags; struct request *rq; - unsigned long tags; + unsigned long tag_mask; int i, nr = 0; - tags = blk_mq_get_tags(data, data->nr_tags, &tag_offset); - if (unlikely(!tags)) + tag_mask = blk_mq_get_tags(data, data->nr_tags, &tag_offset); + if (unlikely(!tag_mask)) return NULL; - for (i = 0; tags; i++) { - if (!(tags & (1UL << i))) + tags = blk_mq_tags_from_data(data); + for (i = 0; tag_mask; i++) { + if (!(tag_mask & (1UL << i))) continue; + prefetch(tags->static_rqs[tag]); tag = tag_offset + i; - tags &= ~(1UL << i); - rq = blk_mq_rq_ctx_init(data, tag, alloc_time_ns); + tag_mask &= ~(1UL << i); + rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns); rq_list_add(data->cached_rq, rq); } data->nr_tags -= nr; @@ -480,7 +479,8 @@ retry: goto retry; } - return blk_mq_rq_ctx_init(data, tag, alloc_time_ns); + return blk_mq_rq_ctx_init(data, blk_mq_tags_from_data(data), tag, + alloc_time_ns); } struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, @@ -490,6 +490,7 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, .q = q, .flags = flags, .cmd_flags = op, + .rq_flags = q->elevator ? RQF_ELV : 0, .nr_tags = 1, }; struct request *rq; @@ -519,6 +520,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, .q = q, .flags = flags, .cmd_flags = op, + .rq_flags = q->elevator ? RQF_ELV : 0, .nr_tags = 1, }; u64 alloc_time_ns = 0; @@ -564,7 +566,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q, tag = blk_mq_get_tag(&data); if (tag == BLK_MQ_NO_TAG) goto out_queue_exit; - return blk_mq_rq_ctx_init(&data, tag, alloc_time_ns); + return blk_mq_rq_ctx_init(&data, blk_mq_tags_from_data(&data), tag, + alloc_time_ns); out_queue_exit: blk_queue_exit(q); @@ -640,7 +643,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, * Partial zone append completions cannot be supported as the * BIO fragments may end up not being written sequentially. */ - if (bio->bi_iter.bi_size == nbytes) + if (bio->bi_iter.bi_size != nbytes) bio->bi_status = BLK_STS_IOERR; else bio->bi_iter.bi_sector = rq->__sector; @@ -819,7 +822,7 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx, void blk_mq_end_request_batch(struct io_comp_batch *iob) { int tags[TAG_COMP_BATCH], nr_tags = 0; - struct blk_mq_hw_ctx *last_hctx = NULL; + struct blk_mq_hw_ctx *cur_hctx = NULL; struct request *rq; u64 now = 0; @@ -842,17 +845,17 @@ void blk_mq_end_request_batch(struct io_comp_batch *iob) blk_pm_mark_last_busy(rq); rq_qos_done(rq->q, rq); - if (nr_tags == TAG_COMP_BATCH || - (last_hctx && last_hctx != rq->mq_hctx)) { - blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); + if (nr_tags == TAG_COMP_BATCH || cur_hctx != rq->mq_hctx) { + if (cur_hctx) + blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); nr_tags = 0; + cur_hctx = rq->mq_hctx; } tags[nr_tags++] = rq->tag; - last_hctx = rq->mq_hctx; } if (nr_tags) - blk_mq_flush_tag_batch(last_hctx, tags, nr_tags); + blk_mq_flush_tag_batch(cur_hctx, tags, nr_tags); } EXPORT_SYMBOL_GPL(blk_mq_end_request_batch); @@ -1593,6 +1596,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, int errors, queued; blk_status_t ret = BLK_STS_OK; LIST_HEAD(zone_list); + bool needs_resource = false; if (list_empty(list)) return false; @@ -1638,6 +1642,8 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, queued++; break; case BLK_STS_RESOURCE: + needs_resource = true; + fallthrough; case BLK_STS_DEV_RESOURCE: blk_mq_handle_dev_resource(rq, list); goto out; @@ -1648,6 +1654,7 @@ bool blk_mq_dispatch_rq_list(struct blk_mq_hw_ctx *hctx, struct list_head *list, * accept. */ blk_mq_handle_zone_resource(rq, &zone_list); + needs_resource = true; break; default: errors++; @@ -1672,7 +1679,6 @@ out: /* For non-shared tags, the RESTART check will suffice */ bool no_tag = prep == PREP_DISPATCH_NO_TAG && (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED); - bool no_budget_avail = prep == PREP_DISPATCH_NO_BUDGET; if (nr_budgets) blk_mq_release_budgets(q, list); @@ -1713,14 +1719,16 @@ out: * If driver returns BLK_STS_RESOURCE and SCHED_RESTART * bit is set, run queue after a delay to avoid IO stalls * that could otherwise occur if the queue is idle. We'll do - * similar if we couldn't get budget and SCHED_RESTART is set. + * similar if we couldn't get budget or couldn't lock a zone + * and SCHED_RESTART is set. */ needs_restart = blk_mq_sched_needs_restart(hctx); + if (prep == PREP_DISPATCH_NO_BUDGET) + needs_resource = true; if (!needs_restart || (no_tag && list_empty_careful(&hctx->dispatch_wait.entry))) blk_mq_run_hw_queue(hctx, true); - else if (needs_restart && (ret == BLK_STS_RESOURCE || - no_budget_avail)) + else if (needs_restart && needs_resource) blk_mq_delay_run_hw_queue(hctx, BLK_MQ_RESOURCE_DELAY); blk_mq_update_dispatch_busy(hctx, true); @@ -2223,7 +2231,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; plug->rq_count = 0; - if (!plug->multiple_queues && !plug->has_elevator) { + if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) { blk_mq_plug_issue_direct(plug, from_schedule); if (rq_list_empty(plug->mq_list)) return; @@ -2512,6 +2520,7 @@ void blk_mq_submit_bio(struct bio *bio) .q = q, .nr_tags = 1, .cmd_flags = bio->bi_opf, + .rq_flags = q->elevator ? RQF_ELV : 0, }; if (plug) { diff --git a/block/blk-mq.h b/block/blk-mq.h index 08fb5922e611..28859fc5faee 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -149,6 +149,7 @@ struct blk_mq_alloc_data { blk_mq_req_flags_t flags; unsigned int shallow_depth; unsigned int cmd_flags; + unsigned int rq_flags; /* allocate multiple requests/tags in one go */ unsigned int nr_tags; @@ -166,10 +167,9 @@ static inline bool blk_mq_is_shared_tags(unsigned int flags) static inline struct blk_mq_tags *blk_mq_tags_from_data(struct blk_mq_alloc_data *data) { - if (data->q->elevator) - return data->hctx->sched_tags; - - return data->hctx->tags; + if (!(data->rq_flags & RQF_ELV)) + return data->hctx->tags; + return data->hctx->sched_tags; } static inline bool blk_mq_hctx_stopped(struct blk_mq_hw_ctx *hctx) diff --git a/block/blk-settings.c b/block/blk-settings.c index a7c857ad7d10..b880c70e22e4 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -842,6 +842,24 @@ bool blk_queue_can_use_dma_map_merging(struct request_queue *q, } EXPORT_SYMBOL_GPL(blk_queue_can_use_dma_map_merging); +static bool disk_has_partitions(struct gendisk *disk) +{ + unsigned long idx; + struct block_device *part; + bool ret = false; + + rcu_read_lock(); + xa_for_each(&disk->part_tbl, idx, part) { + if (bdev_is_partition(part)) { + ret = true; + break; + } + } + rcu_read_unlock(); + + return ret; +} + /** * blk_queue_set_zoned - configure a disk queue zoned model. * @disk: the gendisk of the queue to configure @@ -876,7 +894,7 @@ void blk_queue_set_zoned(struct gendisk *disk, enum blk_zoned_model model) * we do nothing special as far as the block layer is concerned. */ if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) || - !xa_empty(&disk->part_tbl)) + disk_has_partitions(disk)) model = BLK_ZONED_NONE; break; case BLK_ZONED_NONE: diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 36f14d658e81..cef1f713370b 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -873,16 +873,15 @@ int blk_register_queue(struct gendisk *disk) } mutex_lock(&q->sysfs_lock); + + ret = disk_register_independent_access_ranges(disk, NULL); + if (ret) + goto put_dev; + if (q->elevator) { ret = elv_register_queue(q, false); - if (ret) { - mutex_unlock(&q->sysfs_lock); - mutex_unlock(&q->sysfs_dir_lock); - kobject_del(&q->kobj); - blk_trace_remove_sysfs(dev); - kobject_put(&dev->kobj); - return ret; - } + if (ret) + goto put_dev; } blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q); @@ -914,6 +913,16 @@ unlock: } return ret; + +put_dev: + disk_unregister_independent_access_ranges(disk); + mutex_unlock(&q->sysfs_lock); + mutex_unlock(&q->sysfs_dir_lock); + kobject_del(&q->kobj); + blk_trace_remove_sysfs(dev); + kobject_put(&dev->kobj); + + return ret; } /** @@ -958,6 +967,7 @@ void blk_unregister_queue(struct gendisk *disk) mutex_lock(&q->sysfs_lock); if (q->elevator) elv_unregister_queue(q); + disk_unregister_independent_access_ranges(disk); mutex_unlock(&q->sysfs_lock); mutex_unlock(&q->sysfs_dir_lock); diff --git a/block/blk.h b/block/blk.h index 6a039e6c7d07..7afffd548daf 100644 --- a/block/blk.h +++ b/block/blk.h @@ -454,4 +454,8 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg); extern const struct address_space_operations def_blk_aops; +int disk_register_independent_access_ranges(struct gendisk *disk, + struct blk_independent_access_ranges *new_iars); +void disk_unregister_independent_access_ranges(struct gendisk *disk); + #endif /* BLK_INTERNAL_H */ diff --git a/block/bsg-lib.c b/block/bsg-lib.c index ccb98276c964..10aa378702fa 100644 --- a/block/bsg-lib.c +++ b/block/bsg-lib.c @@ -31,6 +31,7 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, struct bsg_job *job; struct request *rq; struct bio *bio; + void *reply; int ret; if (hdr->protocol != BSG_PROTOCOL_SCSI || @@ -39,22 +40,28 @@ static int bsg_transport_sg_io_fn(struct request_queue *q, struct sg_io_v4 *hdr, if (!capable(CAP_SYS_RAWIO)) return -EPERM; - rq = blk_get_request(q, hdr->dout_xfer_len ? + rq = blk_mq_alloc_request(q, hdr->dout_xfer_len ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN, 0); if (IS_ERR(rq)) return PTR_ERR(rq); rq->timeout = timeout; job = blk_mq_rq_to_pdu(rq); + reply = job->reply; + memset(job, 0, sizeof(*job)); + job->reply = reply; + job->reply_len = SCSI_SENSE_BUFFERSIZE; + job->dd_data = job + 1; + job->request_len = hdr->request_len; job->request = memdup_user(uptr64(hdr->request), hdr->request_len); if (IS_ERR(job->request)) { ret = PTR_ERR(job->request); - goto out_put_request; + goto out_free_rq; } if (hdr->dout_xfer_len && hdr->din_xfer_len) { - job->bidi_rq = blk_get_request(rq->q, REQ_OP_DRV_IN, 0); + job->bidi_rq = blk_mq_alloc_request(rq->q, REQ_OP_DRV_IN, 0); if (IS_ERR(job->bidi_rq)) { ret = PTR_ERR(job->bidi_rq); goto out_free_job_request; @@ -134,11 +141,11 @@ out_unmap_bidi_rq: blk_rq_unmap_user(job->bidi_bio); out_free_bidi_rq: if (job->bidi_rq) - blk_put_request(job->bidi_rq); + blk_mq_free_request(job->bidi_rq); out_free_job_request: kfree(job->request); -out_put_request: - blk_put_request(rq); +out_free_rq: + blk_mq_free_request(rq); return ret; } @@ -302,18 +309,6 @@ static int bsg_init_rq(struct blk_mq_tag_set *set, struct request *req, return 0; } -/* called right before the request is given to the request_queue user */ -static void bsg_initialize_rq(struct request *req) -{ - struct bsg_job *job = blk_mq_rq_to_pdu(req); - void *reply = job->reply; - - memset(job, 0, sizeof(*job)); - job->reply = reply; - job->reply_len = SCSI_SENSE_BUFFERSIZE; - job->dd_data = job + 1; -} - static void bsg_exit_rq(struct blk_mq_tag_set *set, struct request *req, unsigned int hctx_idx) { @@ -350,7 +345,6 @@ static const struct blk_mq_ops bsg_mq_ops = { .queue_rq = bsg_queue_rq, .init_request = bsg_init_rq, .exit_request = bsg_exit_rq, - .initialize_rq_fn = bsg_initialize_rq, .complete = bsg_complete, .timeout = bsg_timeout, }; diff --git a/block/fops.c b/block/fops.c index 396537598e3e..4e22b0794c82 100644 --- a/block/fops.c +++ b/block/fops.c @@ -124,9 +124,8 @@ out: } enum { - DIO_MULTI_BIO = 1, - DIO_SHOULD_DIRTY = 2, - DIO_IS_SYNC = 4, + DIO_SHOULD_DIRTY = 1, + DIO_IS_SYNC = 2, }; struct blkdev_dio { @@ -150,7 +149,7 @@ static void blkdev_bio_end_io(struct bio *bio) if (bio->bi_status && !dio->bio.bi_status) dio->bio.bi_status = bio->bi_status; - if (!(dio->flags & DIO_MULTI_BIO) || atomic_dec_and_test(&dio->ref)) { + if (atomic_dec_and_test(&dio->ref)) { if (!(dio->flags & DIO_IS_SYNC)) { struct kiocb *iocb = dio->iocb; ssize_t ret; @@ -164,9 +163,8 @@ static void blkdev_bio_end_io(struct bio *bio) ret = blk_status_to_errno(dio->bio.bi_status); } - dio->iocb->ki_complete(iocb, ret, 0); - if (dio->flags & DIO_MULTI_BIO) - bio_put(&dio->bio); + dio->iocb->ki_complete(iocb, ret); + bio_put(&dio->bio); } else { struct task_struct *waiter = dio->waiter; @@ -190,7 +188,6 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, struct blk_plug plug; struct blkdev_dio *dio; struct bio *bio; - bool do_poll = (iocb->ki_flags & IOCB_HIPRI); bool is_read = (iov_iter_rw(iter) == READ), is_sync; loff_t pos = iocb->ki_pos; int ret = 0; @@ -202,11 +199,17 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); dio = container_of(bio, struct blkdev_dio, bio); + atomic_set(&dio->ref, 1); + /* + * Grab an extra reference to ensure the dio structure which is embedded + * into the first bio stays around. + */ + bio_get(bio); + is_sync = is_sync_kiocb(iocb); if (is_sync) { dio->flags = DIO_IS_SYNC; dio->waiter = current; - bio_get(bio); } else { dio->flags = 0; dio->iocb = iocb; @@ -216,12 +219,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, if (is_read && iter_is_iovec(iter)) dio->flags |= DIO_SHOULD_DIRTY; - /* - * Don't plug for HIPRI/polled IO, as those should go straight - * to issue - */ - if (!(iocb->ki_flags & IOCB_HIPRI)) - blk_start_plug(&plug); + blk_start_plug(&plug); for (;;) { bio_set_dev(bio, bdev); @@ -254,34 +252,15 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS); if (!nr_pages) { - if (do_poll) - bio_set_polled(bio, iocb); submit_bio(bio); - if (do_poll) - WRITE_ONCE(iocb->private, bio); break; } - if (!(dio->flags & DIO_MULTI_BIO)) { - /* - * AIO needs an extra reference to ensure the dio - * structure which is embedded into the first bio - * stays around. - */ - if (!is_sync) - bio_get(bio); - dio->flags |= DIO_MULTI_BIO; - atomic_set(&dio->ref, 2); - do_poll = false; - } else { - atomic_inc(&dio->ref); - } - + atomic_inc(&dio->ref); submit_bio(bio); bio = bio_alloc(GFP_KERNEL, nr_pages); } - if (!(iocb->ki_flags & IOCB_HIPRI)) - blk_finish_plug(&plug); + blk_finish_plug(&plug); if (!is_sync) return -EIOCBQUEUED; @@ -290,9 +269,7 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, set_current_state(TASK_UNINTERRUPTIBLE); if (!READ_ONCE(dio->waiter)) break; - - if (!do_poll || !bio_poll(bio, NULL, 0)) - blk_io_schedule(); + blk_io_schedule(); } __set_current_state(TASK_RUNNING); @@ -305,6 +282,94 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, return ret; } +static void blkdev_bio_end_io_async(struct bio *bio) +{ + struct blkdev_dio *dio = container_of(bio, struct blkdev_dio, bio); + struct kiocb *iocb = dio->iocb; + ssize_t ret; + + if (likely(!bio->bi_status)) { + ret = dio->size; + iocb->ki_pos += ret; + } else { + ret = blk_status_to_errno(bio->bi_status); + } + + iocb->ki_complete(iocb, ret); + + if (dio->flags & DIO_SHOULD_DIRTY) { + bio_check_pages_dirty(bio); + } else { + bio_release_pages(bio, false); + bio_put(bio); + } +} + +static ssize_t __blkdev_direct_IO_async(struct kiocb *iocb, + struct iov_iter *iter, + unsigned int nr_pages) +{ + struct block_device *bdev = iocb->ki_filp->private_data; + struct blkdev_dio *dio; + struct bio *bio; + loff_t pos = iocb->ki_pos; + int ret = 0; + + if ((pos | iov_iter_alignment(iter)) & + (bdev_logical_block_size(bdev) - 1)) + return -EINVAL; + + bio = bio_alloc_kiocb(iocb, nr_pages, &blkdev_dio_pool); + dio = container_of(bio, struct blkdev_dio, bio); + dio->flags = 0; + dio->iocb = iocb; + bio_set_dev(bio, bdev); + bio->bi_iter.bi_sector = pos >> SECTOR_SHIFT; + bio->bi_write_hint = iocb->ki_hint; + bio->bi_end_io = blkdev_bio_end_io_async; + bio->bi_ioprio = iocb->ki_ioprio; + + if (iov_iter_is_bvec(iter)) { + /* + * Users don't rely on the iterator being in any particular + * state for async I/O returning -EIOCBQUEUED, hence we can + * avoid expensive iov_iter_advance(). Bypass + * bio_iov_iter_get_pages() and set the bvec directly. + */ + bio_iov_bvec_set(bio, iter); + } else { + ret = bio_iov_iter_get_pages(bio, iter); + if (unlikely(ret)) { + bio->bi_status = BLK_STS_IOERR; + bio_endio(bio); + return ret; + } + } + dio->size = bio->bi_iter.bi_size; + + if (iov_iter_rw(iter) == READ) { + bio->bi_opf = REQ_OP_READ; + if (iter_is_iovec(iter)) { + dio->flags |= DIO_SHOULD_DIRTY; + bio_set_pages_dirty(bio); + } + } else { + bio->bi_opf = dio_bio_write_op(iocb); + task_io_account_write(bio->bi_iter.bi_size); + } + + if (iocb->ki_flags & IOCB_HIPRI) { + bio->bi_opf |= REQ_POLLED | REQ_NOWAIT; + submit_bio(bio); + WRITE_ONCE(iocb->private, bio); + } else { + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; + submit_bio(bio); + } + return -EIOCBQUEUED; +} + static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) { unsigned int nr_pages; @@ -313,9 +378,11 @@ static ssize_t blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter) return 0; nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1); - if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS) - return __blkdev_direct_IO_simple(iocb, iter, nr_pages); - + if (likely(nr_pages <= BIO_MAX_VECS)) { + if (is_sync_kiocb(iocb)) + return __blkdev_direct_IO_simple(iocb, iter, nr_pages); + return __blkdev_direct_IO_async(iocb, iter, nr_pages); + } return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages)); } @@ -538,7 +605,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start, return -EOPNOTSUPP; /* Don't go off the end of the device. */ - isize = i_size_read(bdev->bd_inode); + isize = bdev_nr_bytes(bdev); if (start >= isize) return -EINVAL; if (end >= isize) { diff --git a/block/genhd.c b/block/genhd.c index 64f83c4aee99..febaaa55125a 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -58,6 +58,7 @@ void set_capacity(struct gendisk *disk, sector_t sectors) spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + bdev->bd_nr_sectors = sectors; spin_unlock(&bdev->bd_size_lock); } EXPORT_SYMBOL(set_capacity); @@ -589,16 +590,6 @@ void del_gendisk(struct gendisk *disk) * Prevent new I/O from crossing bio_queue_enter(). */ blk_queue_start_drain(q); - blk_mq_freeze_queue_wait(q); - - rq_qos_exit(q); - blk_sync_queue(q); - blk_flush_integrity(); - /* - * Allow using passthrough request again after the queue is torn down. - */ - blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); - __blk_mq_unfreeze_queue(q, true); if (!(disk->flags & GENHD_FL_HIDDEN)) { sysfs_remove_link(&disk_to_dev(disk)->kobj, "bdi"); @@ -621,6 +612,18 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); + + blk_mq_freeze_queue_wait(q); + + rq_qos_exit(q); + blk_sync_queue(q); + blk_flush_integrity(); + /* + * Allow using passthrough request again after the queue is torn down. + */ + blk_queue_flag_clear(QUEUE_FLAG_INIT_DONE, q); + __blk_mq_unfreeze_queue(q, true); + } EXPORT_SYMBOL(del_gendisk); diff --git a/block/ioctl.c b/block/ioctl.c index 77b1b2453f39..d6af0ac97e57 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -132,7 +132,7 @@ static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode, if (len & 511) return -EINVAL; - if (start + len > i_size_read(bdev->bd_inode)) + if (start + len > bdev_nr_bytes(bdev)) return -EINVAL; err = truncate_bdev_range(bdev, mode, start, start + len - 1); @@ -164,7 +164,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, fmode_t mode, return -EINVAL; if (len & 511) return -EINVAL; - if (end >= (uint64_t)i_size_read(bdev->bd_inode)) + if (end >= (uint64_t)bdev_nr_bytes(bdev)) return -EINVAL; if (end < start) return -EINVAL; @@ -543,7 +543,6 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct block_device *bdev = I_BDEV(file->f_mapping->host); void __user *argp = (void __user *)arg; fmode_t mode = file->f_mode; - loff_t size; int ret; /* @@ -570,10 +569,9 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return put_long(argp, (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: - size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) + if (bdev_nr_sectors(bdev) > ~0UL) return -EFBIG; - return put_ulong(argp, size >> 9); + return put_ulong(argp, bdev_nr_sectors(bdev)); /* The data is compatible, but the command number is different */ case BLKBSZGET: /* get block device soft block size (cf. BLKSSZGET) */ @@ -581,7 +579,7 @@ long blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZSET: return blkdev_bszset(bdev, mode, argp); case BLKGETSIZE64: - return put_u64(argp, i_size_read(bdev->bd_inode)); + return put_u64(argp, bdev_nr_bytes(bdev)); /* Incompatible alignment on i386 */ case BLKTRACESETUP: @@ -615,7 +613,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) struct block_device *bdev = I_BDEV(file->f_mapping->host); struct gendisk *disk = bdev->bd_disk; fmode_t mode = file->f_mode; - loff_t size; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have @@ -641,10 +638,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_put_long(argp, (bdev->bd_disk->bdi->ra_pages * PAGE_SIZE) / 512); case BLKGETSIZE: - size = i_size_read(bdev->bd_inode); - if ((size >> 9) > ~0UL) + if (bdev_nr_sectors(bdev) > ~0UL) return -EFBIG; - return compat_put_ulong(argp, size >> 9); + return compat_put_ulong(argp, bdev_nr_sectors(bdev)); /* The data is compatible, but the command number is different */ case BLKBSZGET_32: /* get the logical block size (cf. BLKSSZGET) */ @@ -652,7 +648,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKBSZSET_32: return blkdev_bszset(bdev, mode, argp); case BLKGETSIZE64_32: - return put_u64(argp, i_size_read(bdev->bd_inode)); + return put_u64(argp, bdev_nr_bytes(bdev)); /* Incompatible alignment on i386 */ case BLKTRACESETUP32: diff --git a/block/partitions/core.c b/block/partitions/core.c index 9dbddc355b40..334b72ef1d73 100644 --- a/block/partitions/core.c +++ b/block/partitions/core.c @@ -91,6 +91,7 @@ static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors) { spin_lock(&bdev->bd_size_lock); i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT); + bdev->bd_nr_sectors = sectors; spin_unlock(&bdev->bd_size_lock); } @@ -424,6 +425,7 @@ out_del: device_del(pdev); out_put: put_device(pdev); + return ERR_PTR(err); out_put_disk: put_disk(disk); return ERR_PTR(err); diff --git a/block/partitions/efi.c b/block/partitions/efi.c index 7ca5c4c374d4..5e9be13a56a8 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -133,7 +133,7 @@ efi_crc32(const void *buf, unsigned long len) */ static u64 last_lba(struct gendisk *disk) { - return div_u64(disk->part0->bd_inode->i_size, + return div_u64(bdev_nr_bytes(disk->part0), queue_logical_block_size(disk->queue)) - 1ULL; } diff --git a/block/partitions/ibm.c b/block/partitions/ibm.c index 9bca396aef4a..403756dbd50d 100644 --- a/block/partitions/ibm.c +++ b/block/partitions/ibm.c @@ -198,7 +198,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state, char name[], union label_t *label, sector_t labelsect, - loff_t i_size, + sector_t nr_sectors, dasd_information2_t *info) { loff_t offset, geo_size, size; @@ -213,14 +213,14 @@ static int find_lnx1_partitions(struct parsed_partitions *state, } else { /* * Formated w/o large volume support. If the sanity check - * 'size based on geo == size based on i_size' is true, then + * 'size based on geo == size based on nr_sectors' is true, then * we can safely assume that we know the formatted size of * the disk, otherwise we need additional information * that we can only get from a real DASD device. */ geo_size = geo->cylinders * geo->heads * geo->sectors * secperblk; - size = i_size >> 9; + size = nr_sectors; if (size != geo_size) { if (!info) { strlcat(state->pp_buf, "\n", PAGE_SIZE); @@ -229,7 +229,7 @@ static int find_lnx1_partitions(struct parsed_partitions *state, if (!strcmp(info->type, "ECKD")) if (geo_size < size) size = geo_size; - /* else keep size based on i_size */ + /* else keep size based on nr_sectors */ } } /* first and only partition starts in the first block after the label */ @@ -293,7 +293,8 @@ int ibm_partition(struct parsed_partitions *state) struct gendisk *disk = state->disk; struct block_device *bdev = disk->part0; int blocksize, res; - loff_t i_size, offset, size; + loff_t offset, size; + sector_t nr_sectors; dasd_information2_t *info; struct hd_geometry *geo; char type[5] = {0,}; @@ -308,8 +309,8 @@ int ibm_partition(struct parsed_partitions *state) blocksize = bdev_logical_block_size(bdev); if (blocksize <= 0) goto out_symbol; - i_size = i_size_read(bdev->bd_inode); - if (i_size == 0) + nr_sectors = bdev_nr_sectors(bdev); + if (nr_sectors == 0) goto out_symbol; info = kmalloc(sizeof(dasd_information2_t), GFP_KERNEL); if (info == NULL) @@ -336,7 +337,7 @@ int ibm_partition(struct parsed_partitions *state) label); } else if (!strncmp(type, "LNX1", 4)) { res = find_lnx1_partitions(state, geo, blocksize, name, - label, labelsect, i_size, + label, labelsect, nr_sectors, info); } else if (!strncmp(type, "CMS1", 4)) { res = find_cms1_partitions(state, geo, blocksize, name, @@ -353,7 +354,7 @@ int ibm_partition(struct parsed_partitions *state) res = 1; if (info->format == DASD_FORMAT_LDL) { strlcat(state->pp_buf, "(nonl)", PAGE_SIZE); - size = i_size >> 9; + size = nr_sectors; offset = (info->label_block + 1) * (blocksize >> 9); put_partition(state, 1, offset, size-offset); strlcat(state->pp_buf, "\n", PAGE_SIZE); |