summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-11-09 11:20:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-11-09 11:20:07 -0800
commit3e28850cbd359bed841b832200f9fc208a9ef040 (patch)
treeef4e5b294f934f58fc08feb89d24291b71c01d4a
parent1dc1f92e24d6a5479ae8ceea3e2fac69f8d9dab7 (diff)
parent26af1cd00364ce20dbec66b93ef42f9d42dc6953 (diff)
Merge tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block
Pull block fixes from Jens Axboe: - Set of fixes for the batched tag allocation (Ming, me) - add_disk() error handling fix (Luis) - Nested queue quiesce fixes (Ming) - Shared tags init error handling fix (Ye) - Misc cleanups (Jean, Ming, me) * tag 'for-5.16/block-2021-11-09' of git://git.kernel.dk/linux-block: nvme: wait until quiesce is done scsi: make sure that request queue queiesce and unquiesce balanced scsi: avoid to quiesce sdev->request_queue two times blk-mq: add one API for waiting until quiesce is done blk-mq: don't free tags if the tag_set is used by other device in queue initialztion block: fix device_add_disk() kobject_create_and_add() error handling block: ensure cached plug request matches the current queue block: move queue enter logic into blk_mq_submit_bio() block: make bio_queue_enter() fast-path available inline block: split request allocation components into helpers block: have plug stored requests hold references to the queue blk-mq: update hctx->nr_active in blk_mq_end_request_batch() blk-mq: add RQF_ELV debug entry blk-mq: only try to run plug merge if request has same queue with incoming bio block: move RQF_ELV setting into allocators dm: don't stop request queue after the dm device is suspended block: replace always false argument with 'false' block: assign correct tag before doing prefetch of request blk-mq: fix redundant check of !e expression
-rw-r--r--block/blk-core.c61
-rw-r--r--block/blk-merge.c6
-rw-r--r--block/blk-mq-debugfs.c1
-rw-r--r--block/blk-mq-sched.c15
-rw-r--r--block/blk-mq.c187
-rw-r--r--block/blk-mq.h12
-rw-r--r--block/blk.h35
-rw-r--r--block/genhd.c8
-rw-r--r--drivers/md/dm.c10
-rw-r--r--drivers/nvme/host/core.c4
-rw-r--r--drivers/scsi/scsi_lib.c62
-rw-r--r--include/linux/blk-mq.h1
-rw-r--r--include/scsi/scsi_device.h1
13 files changed, 263 insertions, 140 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index ac1de7d73a45..b043de2baaac 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -386,30 +386,6 @@ void blk_cleanup_queue(struct request_queue *q)
}
EXPORT_SYMBOL(blk_cleanup_queue);
-static bool blk_try_enter_queue(struct request_queue *q, bool pm)
-{
- rcu_read_lock();
- if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
- goto fail;
-
- /*
- * The code that increments the pm_only counter must ensure that the
- * counter is globally visible before the queue is unfrozen.
- */
- if (blk_queue_pm_only(q) &&
- (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
- goto fail_put;
-
- rcu_read_unlock();
- return true;
-
-fail_put:
- blk_queue_exit(q);
-fail:
- rcu_read_unlock();
- return false;
-}
-
/**
* blk_queue_enter() - try to increase q->q_usage_counter
* @q: request queue pointer
@@ -442,10 +418,8 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
return 0;
}
-static inline int bio_queue_enter(struct bio *bio)
+int __bio_queue_enter(struct request_queue *q, struct bio *bio)
{
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
-
while (!blk_try_enter_queue(q, false)) {
struct gendisk *disk = bio->bi_bdev->bd_disk;
@@ -742,7 +716,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
return BLK_STS_OK;
}
-static noinline_for_stack bool submit_bio_checks(struct bio *bio)
+noinline_for_stack bool submit_bio_checks(struct bio *bio)
{
struct block_device *bdev = bio->bi_bdev;
struct request_queue *q = bdev_get_queue(bdev);
@@ -860,22 +834,23 @@ end_io:
return false;
}
-static void __submit_bio(struct bio *bio)
+static void __submit_bio_fops(struct gendisk *disk, struct bio *bio)
{
- struct gendisk *disk = bio->bi_bdev->bd_disk;
-
if (unlikely(bio_queue_enter(bio) != 0))
return;
+ if (submit_bio_checks(bio) && blk_crypto_bio_prep(&bio))
+ disk->fops->submit_bio(bio);
+ blk_queue_exit(disk->queue);
+}
- if (!submit_bio_checks(bio) || !blk_crypto_bio_prep(&bio))
- goto queue_exit;
- if (!disk->fops->submit_bio) {
+static void __submit_bio(struct bio *bio)
+{
+ struct gendisk *disk = bio->bi_bdev->bd_disk;
+
+ if (!disk->fops->submit_bio)
blk_mq_submit_bio(bio);
- return;
- }
- disk->fops->submit_bio(bio);
-queue_exit:
- blk_queue_exit(disk->queue);
+ else
+ __submit_bio_fops(disk, bio);
}
/*
@@ -1615,7 +1590,13 @@ void blk_flush_plug(struct blk_plug *plug, bool from_schedule)
flush_plug_callbacks(plug, from_schedule);
if (!rq_list_empty(plug->mq_list))
blk_mq_flush_plug_list(plug, from_schedule);
- if (unlikely(!from_schedule && plug->cached_rq))
+ /*
+ * Unconditionally flush out cached requests, even if the unplug
+ * event came from schedule. Since we know hold references to the
+ * queue for cached requests, we don't want a blocked task holding
+ * up a queue freeze/quiesce event.
+ */
+ if (unlikely(!rq_list_empty(plug->cached_rq)))
blk_mq_free_plug_rqs(plug);
}
diff --git a/block/blk-merge.c b/block/blk-merge.c
index df69f4bb7717..893c1a60b701 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -1101,9 +1101,11 @@ bool blk_attempt_plug_merge(struct request_queue *q, struct bio *bio,
* the same queue, there should be only one such rq in a queue
*/
*same_queue_rq = true;
+
+ if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) ==
+ BIO_MERGE_OK)
+ return true;
}
- if (blk_attempt_bio_merge(q, rq, bio, nr_segs, false) == BIO_MERGE_OK)
- return true;
return false;
}
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index f5076c173477..4f2cf8399f3d 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -308,6 +308,7 @@ static const char *const rqf_name[] = {
RQF_NAME(SPECIAL_PAYLOAD),
RQF_NAME(ZONE_WRITE_LOCKED),
RQF_NAME(MQ_POLL_SLEPT),
+ RQF_NAME(ELV),
};
#undef RQF_NAME
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c62b966dfaba..4be652fa38e7 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -370,15 +370,20 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
bool ret = false;
enum hctx_type type;
- if (e && e->type->ops.bio_merge)
- return e->type->ops.bio_merge(q, bio, nr_segs);
+ if (bio_queue_enter(bio))
+ return false;
+
+ if (e && e->type->ops.bio_merge) {
+ ret = e->type->ops.bio_merge(q, bio, nr_segs);
+ goto out_put;
+ }
ctx = blk_mq_get_ctx(q);
hctx = blk_mq_map_queue(q, bio->bi_opf, ctx);
type = hctx->type;
if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE) ||
list_empty_careful(&ctx->rq_lists[type]))
- return false;
+ goto out_put;
/* default per sw-queue merge */
spin_lock(&ctx->lock);
@@ -391,6 +396,8 @@ bool blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
ret = true;
spin_unlock(&ctx->lock);
+out_put:
+ blk_queue_exit(q);
return ret;
}
@@ -497,7 +504,7 @@ void blk_mq_sched_insert_requests(struct blk_mq_hw_ctx *hctx,
* busy in case of 'none' scheduler, and this way may save
* us one extra enqueue & dequeue to sw queue.
*/
- if (!hctx->dispatch_busy && !e && !run_queue_async) {
+ if (!hctx->dispatch_busy && !run_queue_async) {
blk_mq_try_issue_list_directly(hctx, list);
if (list_empty(list))
goto out;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 07eb1412760b..629cf421417f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -251,22 +251,18 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q)
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
/**
- * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * blk_mq_wait_quiesce_done() - wait until in-progress quiesce is done
* @q: request queue.
*
- * Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Once this function is returned, we make
- * sure no dispatch can happen until the queue is unquiesced via
- * blk_mq_unquiesce_queue().
+ * Note: it is driver's responsibility for making sure that quiesce has
+ * been started.
*/
-void blk_mq_quiesce_queue(struct request_queue *q)
+void blk_mq_wait_quiesce_done(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned int i;
bool rcu = false;
- blk_mq_quiesce_queue_nowait(q);
-
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
synchronize_srcu(hctx->srcu);
@@ -276,6 +272,22 @@ void blk_mq_quiesce_queue(struct request_queue *q)
if (rcu)
synchronize_rcu();
}
+EXPORT_SYMBOL_GPL(blk_mq_wait_quiesce_done);
+
+/**
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
+ * @q: request queue.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+ blk_mq_quiesce_queue_nowait(q);
+ blk_mq_wait_quiesce_done(q);
+}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
/*
@@ -405,12 +417,15 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
for (i = 0; tag_mask; i++) {
if (!(tag_mask & (1UL << i)))
continue;
- prefetch(tags->static_rqs[tag]);
tag = tag_offset + i;
+ prefetch(tags->static_rqs[tag]);
tag_mask &= ~(1UL << i);
rq = blk_mq_rq_ctx_init(data, tags, tag, alloc_time_ns);
rq_list_add(data->cached_rq, rq);
+ nr++;
}
+ /* caller already holds a reference, add for remainder */
+ percpu_ref_get_many(&data->q->q_usage_counter, nr - 1);
data->nr_tags -= nr;
return rq_list_pop(data->cached_rq);
@@ -419,7 +434,6 @@ __blk_mq_alloc_requests_batch(struct blk_mq_alloc_data *data,
static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
{
struct request_queue *q = data->q;
- struct elevator_queue *e = q->elevator;
u64 alloc_time_ns = 0;
struct request *rq;
unsigned int tag;
@@ -431,7 +445,11 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
if (data->cmd_flags & REQ_NOWAIT)
data->flags |= BLK_MQ_REQ_NOWAIT;
- if (e) {
+ if (q->elevator) {
+ struct elevator_queue *e = q->elevator;
+
+ data->rq_flags |= RQF_ELV;
+
/*
* Flush/passthrough requests are special and go directly to the
* dispatch list. Don't include reserved tags in the
@@ -447,7 +465,7 @@ static struct request *__blk_mq_alloc_requests(struct blk_mq_alloc_data *data)
retry:
data->ctx = blk_mq_get_ctx(q);
data->hctx = blk_mq_map_queue(q, data->cmd_flags, data->ctx);
- if (!e)
+ if (!(data->rq_flags & RQF_ELV))
blk_mq_tag_busy(data->hctx);
/*
@@ -490,7 +508,6 @@ struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
.q = q,
.flags = flags,
.cmd_flags = op,
- .rq_flags = q->elevator ? RQF_ELV : 0,
.nr_tags = 1,
};
struct request *rq;
@@ -520,7 +537,6 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
.q = q,
.flags = flags,
.cmd_flags = op,
- .rq_flags = q->elevator ? RQF_ELV : 0,
.nr_tags = 1,
};
u64 alloc_time_ns = 0;
@@ -561,6 +577,8 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
if (!q->elevator)
blk_mq_tag_busy(data.hctx);
+ else
+ data.rq_flags |= RQF_ELV;
ret = -EWOULDBLOCK;
tag = blk_mq_get_tag(&data);
@@ -627,10 +645,8 @@ void blk_mq_free_plug_rqs(struct blk_plug *plug)
{
struct request *rq;
- while ((rq = rq_list_pop(&plug->cached_rq)) != NULL) {
- percpu_ref_get(&rq->q->q_usage_counter);
+ while ((rq = rq_list_pop(&plug->cached_rq)) != NULL)
blk_mq_free_request(rq);
- }
}
static void req_bio_endio(struct request *rq, struct bio *bio,
@@ -815,6 +831,13 @@ static inline void blk_mq_flush_tag_batch(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
+ /*
+ * All requests should have been marked as RQF_MQ_INFLIGHT, so
+ * update hctx->nr_active in batch
+ */
+ if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+ __blk_mq_sub_active_requests(hctx, nr_tags);
+
blk_mq_put_tags(hctx->tags, tag_array, nr_tags);
percpu_ref_put_many(&q->q_usage_counter, nr_tags);
}
@@ -2232,7 +2255,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule)
plug->rq_count = 0;
if (!plug->multiple_queues && !plug->has_elevator && !from_schedule) {
- blk_mq_plug_issue_direct(plug, from_schedule);
+ blk_mq_plug_issue_direct(plug, false);
if (rq_list_empty(plug->mq_list))
return;
}
@@ -2472,6 +2495,83 @@ static inline unsigned short blk_plug_max_rq_count(struct blk_plug *plug)
return BLK_MAX_REQUEST_COUNT;
}
+static bool blk_attempt_bio_merge(struct request_queue *q, struct bio *bio,
+ unsigned int nr_segs, bool *same_queue_rq)
+{
+ if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
+ if (blk_attempt_plug_merge(q, bio, nr_segs, same_queue_rq))
+ return true;
+ if (blk_mq_sched_bio_merge(q, bio, nr_segs))
+ return true;
+ }
+ return false;
+}
+
+static struct request *blk_mq_get_new_requests(struct request_queue *q,
+ struct blk_plug *plug,
+ struct bio *bio,
+ unsigned int nsegs,
+ bool *same_queue_rq)
+{
+ struct blk_mq_alloc_data data = {
+ .q = q,
+ .nr_tags = 1,
+ .cmd_flags = bio->bi_opf,
+ };
+ struct request *rq;
+
+ if (unlikely(bio_queue_enter(bio)))
+ return NULL;
+ if (unlikely(!submit_bio_checks(bio)))
+ goto put_exit;
+ if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+ goto put_exit;
+
+ rq_qos_throttle(q, bio);
+
+ if (plug) {
+ data.nr_tags = plug->nr_ios;
+ plug->nr_ios = 1;
+ data.cached_rq = &plug->cached_rq;
+ }
+
+ rq = __blk_mq_alloc_requests(&data);
+ if (rq)
+ return rq;
+
+ rq_qos_cleanup(q, bio);
+ if (bio->bi_opf & REQ_NOWAIT)
+ bio_wouldblock_error(bio);
+put_exit:
+ blk_queue_exit(q);
+ return NULL;
+}
+
+static inline struct request *blk_mq_get_request(struct request_queue *q,
+ struct blk_plug *plug,
+ struct bio *bio,
+ unsigned int nsegs,
+ bool *same_queue_rq)
+{
+ if (plug) {
+ struct request *rq;
+
+ rq = rq_list_peek(&plug->cached_rq);
+ if (rq && rq->q == q) {
+ if (unlikely(!submit_bio_checks(bio)))
+ return NULL;
+ if (blk_attempt_bio_merge(q, bio, nsegs, same_queue_rq))
+ return NULL;
+ plug->cached_rq = rq_list_next(rq);
+ INIT_LIST_HEAD(&rq->queuelist);
+ rq_qos_throttle(q, bio);
+ return rq;
+ }
+ }
+
+ return blk_mq_get_new_requests(q, plug, bio, nsegs, same_queue_rq);
+}
+
/**
* blk_mq_submit_bio - Create and send a request to block device.
* @bio: Bio pointer.
@@ -2495,47 +2595,20 @@ void blk_mq_submit_bio(struct bio *bio)
unsigned int nr_segs = 1;
blk_status_t ret;
+ if (unlikely(!blk_crypto_bio_prep(&bio)))
+ return;
+
blk_queue_bounce(q, &bio);
if (blk_may_split(q, bio))
__blk_queue_split(q, &bio, &nr_segs);
if (!bio_integrity_prep(bio))
- goto queue_exit;
-
- if (!blk_queue_nomerges(q) && bio_mergeable(bio)) {
- if (blk_attempt_plug_merge(q, bio, nr_segs, &same_queue_rq))
- goto queue_exit;
- if (blk_mq_sched_bio_merge(q, bio, nr_segs))
- goto queue_exit;
- }
-
- rq_qos_throttle(q, bio);
+ return;
plug = blk_mq_plug(q, bio);
- if (plug && plug->cached_rq) {
- rq = rq_list_pop(&plug->cached_rq);
- INIT_LIST_HEAD(&rq->queuelist);
- } else {
- struct blk_mq_alloc_data data = {
- .q = q,
- .nr_tags = 1,
- .cmd_flags = bio->bi_opf,
- .rq_flags = q->elevator ? RQF_ELV : 0,
- };
-
- if (plug) {
- data.nr_tags = plug->nr_ios;
- plug->nr_ios = 1;
- data.cached_rq = &plug->cached_rq;
- }
- rq = __blk_mq_alloc_requests(&data);
- if (unlikely(!rq)) {
- rq_qos_cleanup(q, bio);
- if (bio->bi_opf & REQ_NOWAIT)
- bio_wouldblock_error(bio);
- goto queue_exit;
- }
- }
+ rq = blk_mq_get_request(q, plug, bio, nr_segs, &same_queue_rq);
+ if (unlikely(!rq))
+ return;
trace_block_getrq(bio);
@@ -2616,10 +2689,6 @@ void blk_mq_submit_bio(struct bio *bio)
/* Default case. */
blk_mq_sched_insert_request(rq, false, true, true);
}
-
- return;
-queue_exit:
- blk_queue_exit(q);
}
static size_t order_to_size(unsigned int order)
@@ -3605,7 +3674,6 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
struct blk_mq_hw_ctx *hctx = hctxs[j];
if (hctx) {
- __blk_mq_free_map_and_rqs(set, j);
blk_mq_exit_hctx(q, set, hctx, j);
hctxs[j] = NULL;
}
@@ -4113,8 +4181,13 @@ fallback:
list_for_each_entry(q, &set->tag_list, tag_set_list) {
blk_mq_realloc_hw_ctxs(set, q);
if (q->nr_hw_queues != set->nr_hw_queues) {
+ int i = prev_nr_hw_queues;
+
pr_warn("Increasing nr_hw_queues to %d fails, fallback to %d\n",
nr_hw_queues, prev_nr_hw_queues);
+ for (; i < set->nr_hw_queues; i++)
+ __blk_mq_free_map_and_rqs(set, i);
+
set->nr_hw_queues = prev_nr_hw_queues;
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
goto fallback;
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 28859fc5faee..cb0b5482ca5e 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -225,12 +225,18 @@ static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
atomic_inc(&hctx->nr_active);
}
-static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
+static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
+ int val)
{
if (blk_mq_is_shared_tags(hctx->flags))
- atomic_dec(&hctx->queue->nr_active_requests_shared_tags);
+ atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags);
else
- atomic_dec(&hctx->nr_active);
+ atomic_sub(val, &hctx->nr_active);
+}
+
+static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
+{
+ __blk_mq_sub_active_requests(hctx, 1);
}
static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
diff --git a/block/blk.h b/block/blk.h
index 7afffd548daf..b4fed2033e48 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -55,6 +55,41 @@ void blk_free_flush_queue(struct blk_flush_queue *q);
void blk_freeze_queue(struct request_queue *q);
void __blk_mq_unfreeze_queue(struct request_queue *q, bool force_atomic);
void blk_queue_start_drain(struct request_queue *q);
+int __bio_queue_enter(struct request_queue *q, struct bio *bio);
+bool submit_bio_checks(struct bio *bio);
+
+static inline bool blk_try_enter_queue(struct request_queue *q, bool pm)
+{
+ rcu_read_lock();
+ if (!percpu_ref_tryget_live_rcu(&q->q_usage_counter))
+ goto fail;
+
+ /*
+ * The code that increments the pm_only counter must ensure that the
+ * counter is globally visible before the queue is unfrozen.
+ */
+ if (blk_queue_pm_only(q) &&
+ (!pm || queue_rpm_status(q) == RPM_SUSPENDED))
+ goto fail_put;
+
+ rcu_read_unlock();
+ return true;
+
+fail_put:
+ blk_queue_exit(q);
+fail:
+ rcu_read_unlock();
+ return false;
+}
+
+static inline int bio_queue_enter(struct bio *bio)
+{
+ struct request_queue *q = bdev_get_queue(bio->bi_bdev);
+
+ if (blk_try_enter_queue(q, false))
+ return 0;
+ return __bio_queue_enter(q, bio);
+}
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
diff --git a/block/genhd.c b/block/genhd.c
index febaaa55125a..a4e9e8ebd941 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -469,11 +469,15 @@ int device_add_disk(struct device *parent, struct gendisk *disk,
disk->part0->bd_holder_dir =
kobject_create_and_add("holders", &ddev->kobj);
- if (!disk->part0->bd_holder_dir)
+ if (!disk->part0->bd_holder_dir) {
+ ret = -ENOMEM;
goto out_del_integrity;
+ }
disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
- if (!disk->slave_dir)
+ if (!disk->slave_dir) {
+ ret = -ENOMEM;
goto out_put_holder_dir;
+ }
ret = bd_register_pending_holders(disk);
if (ret < 0)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 8d3157241262..662742a310cb 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1927,16 +1927,6 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
dm_table_event_callback(t, event_callback, md);
- /*
- * The queue hasn't been stopped yet, if the old table type wasn't
- * for request-based during suspension. So stop it to prevent
- * I/O mapping before resume.
- * This must be done before setting the queue restrictions,
- * because request-based dm may be run just after the setting.
- */
- if (request_based)
- dm_stop_queue(q);
-
if (request_based) {
/*
* Leverage the fact that request-based DM targets are
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 838b5e2058be..4b5de8f5435a 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -4518,6 +4518,8 @@ static void nvme_stop_ns_queue(struct nvme_ns *ns)
{
if (!test_and_set_bit(NVME_NS_STOPPED, &ns->flags))
blk_mq_quiesce_queue(ns->queue);
+ else
+ blk_mq_wait_quiesce_done(ns->queue);
}
/*
@@ -4637,6 +4639,8 @@ void nvme_stop_admin_queue(struct nvme_ctrl *ctrl)
{
if (!test_and_set_bit(NVME_CTRL_ADMIN_Q_STOPPED, &ctrl->flags))
blk_mq_quiesce_queue(ctrl->admin_q);
+ else
+ blk_mq_wait_quiesce_done(ctrl->admin_q);
}
EXPORT_SYMBOL_GPL(nvme_stop_admin_queue);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1344553afe70..b731c2983515 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2665,6 +2665,40 @@ scsi_target_resume(struct scsi_target *starget)
}
EXPORT_SYMBOL(scsi_target_resume);
+static int __scsi_internal_device_block_nowait(struct scsi_device *sdev)
+{
+ if (scsi_device_set_state(sdev, SDEV_BLOCK))
+ return scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
+
+ return 0;
+}
+
+void scsi_start_queue(struct scsi_device *sdev)
+{
+ if (cmpxchg(&sdev->queue_stopped, 1, 0))
+ blk_mq_unquiesce_queue(sdev->request_queue);
+}
+
+static void scsi_stop_queue(struct scsi_device *sdev, bool nowait)
+{
+ /*
+ * The atomic variable of ->queue_stopped covers that
+ * blk_mq_quiesce_queue* is balanced with blk_mq_unquiesce_queue.
+ *
+ * However, we still need to wait until quiesce is done
+ * in case that queue has been stopped.
+ */
+ if (!cmpxchg(&sdev->queue_stopped, 0, 1)) {
+ if (nowait)
+ blk_mq_quiesce_queue_nowait(sdev->request_queue);
+ else
+ blk_mq_quiesce_queue(sdev->request_queue);
+ } else {
+ if (!nowait)
+ blk_mq_wait_quiesce_done(sdev->request_queue);
+ }
+}
+
/**
* scsi_internal_device_block_nowait - try to transition to the SDEV_BLOCK state
* @sdev: device to block
@@ -2681,24 +2715,16 @@ EXPORT_SYMBOL(scsi_target_resume);
*/
int scsi_internal_device_block_nowait(struct scsi_device *sdev)
{
- struct request_queue *q = sdev->request_queue;
- int err = 0;
-
- err = scsi_device_set_state(sdev, SDEV_BLOCK);
- if (err) {
- err = scsi_device_set_state(sdev, SDEV_CREATED_BLOCK);
-
- if (err)
- return err;
- }
+ int ret = __scsi_internal_device_block_nowait(sdev);
/*
* The device has transitioned to SDEV_BLOCK. Stop the
* block layer from calling the midlayer with this device's
* request queue.
*/
- blk_mq_quiesce_queue_nowait(q);
- return 0;
+ if (!ret)
+ scsi_stop_queue(sdev, true);
+ return ret;
}
EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);
@@ -2719,25 +2745,17 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_block_nowait);
*/
static int scsi_internal_device_block(struct scsi_device *sdev)
{
- struct request_queue *q = sdev->request_queue;
int err;
mutex_lock(&sdev->state_mutex);
- err = scsi_internal_device_block_nowait(sdev);
+ err = __scsi_internal_device_block_nowait(sdev);
if (err == 0)
- blk_mq_quiesce_queue(q);
+ scsi_stop_queue(sdev, false);
mutex_unlock(&sdev->state_mutex);
return err;
}
-void scsi_start_queue(struct scsi_device *sdev)
-{
- struct request_queue *q = sdev->request_queue;
-
- blk_mq_unquiesce_queue(q);
-}
-
/**
* scsi_internal_device_unblock_nowait - resume a device after a block request
* @sdev: device to resume
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8682663e7368..2949d9ac7484 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -798,6 +798,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_wait_quiesce_done(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 79c3045611fa..83a7890f1479 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -207,6 +207,7 @@ struct scsi_device {
* creation time */
unsigned ignore_media_change:1; /* Ignore MEDIA CHANGE on resume */
+ unsigned int queue_stopped; /* request queue is quiesced */
bool offline_already; /* Device offline message logged */
atomic_t disk_events_disable_depth; /* disable depth for disk events */