From 93b4e74789dbdefcffc7baac107069e74d98513c Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 17 Mar 2022 23:46:46 +0000 Subject: xen-blkback: remove redundant assignment to variable i MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Variable i is being assigned a value that is never read, it is being re-assigned later in a for-loop. The assignment is redundant and can be removed. Cleans up clang scan build warning: drivers/block/xen-blkback/blkback.c:934:14: warning: Although the value stored to 'i' is used in the enclosing expression, the value is never actually read from 'i' [deadcode.DeadStores] Signed-off-by: Colin Ian King Acked-by: Roger Pau Monné Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220317234646.78158-1-colin.i.king@gmail.com Signed-off-by: Jens Axboe --- drivers/block/xen-blkback/blkback.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c index d1e26461a64e..de42458195bc 100644 --- a/drivers/block/xen-blkback/blkback.c +++ b/drivers/block/xen-blkback/blkback.c @@ -931,7 +931,7 @@ static int xen_blkbk_parse_indirect(struct blkif_request *req, if (rc) goto unmap; - for (n = 0, i = 0; n < nseg; n++) { + for (n = 0; n < nseg; n++) { uint8_t first_sect, last_sect; if ((n % SEGS_PER_INDIRECT_FRAME) == 0) { -- cgit v1.2.3 From 08719dd9176b4c55f547bd11812fd6cc35907d37 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Thu, 17 Mar 2022 15:09:30 -0700 Subject: xen/blkfront: fix comment for need_copy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'need_copy' is set when rq_data_dir(req) returns WRITE, in order to copy the written data to persistent page. ".need_copy = rq_data_dir(req) && info->feature_persistent," Signed-off-by: Dongli Zhang Fixes: c004a6fe0c40 ('block/xen-blkfront: Make it running on 64KB page granularity') Acked-by: Roger Pau Monné Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220317220930.5698-1-dongli.zhang@oracle.com Signed-off-by: Jens Axboe --- drivers/block/xen-blkfront.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c index 5d04e480f36a..852df3f2c12a 100644 --- a/drivers/block/xen-blkfront.c +++ b/drivers/block/xen-blkfront.c @@ -576,7 +576,7 @@ struct setup_rw_req { struct blkif_request *ring_req; grant_ref_t gref_head; unsigned int id; - /* Only used when persistent grant is used and it's a read request */ + /* Only used when persistent grant is used and it's a write request */ bool need_copy; unsigned int bvec_off; char *bvec_data; -- cgit v1.2.3 From b2479de38d8fc7ef13d5c78ff5ded6e5a1a4eac0 Mon Sep 17 00:00:00 2001 From: Jackie Liu Date: Mon, 21 Mar 2022 15:12:16 +0800 Subject: n64cart: convert bi_disk to bi_bdev->bd_disk fix build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My kernel robot report below: drivers/block/n64cart.c: In function ‘n64cart_submit_bio’: drivers/block/n64cart.c:91:26: error: ‘struct bio’ has no member named ‘bi_disk’ 91 | struct device *dev = bio->bi_disk->private_data; | ^~ CC drivers/slimbus/qcom-ctrl.o CC drivers/auxdisplay/hd44780.o CC drivers/watchdog/watchdog_core.o CC drivers/nvme/host/fault_inject.o AR drivers/accessibility/braille/built-in.a make[2]: *** [scripts/Makefile.build:288: drivers/block/n64cart.o] Error 1 Fixes: 309dca309fc3 ("block: store a block_device pointer in struct bio"); Reported-by: k2ci Signed-off-by: Jackie Liu Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20220321071216.1549596-1-liu.yun@linux.dev Signed-off-by: Jens Axboe --- drivers/block/n64cart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c index 4db9a8c244af..e094d2b8b5a9 100644 --- a/drivers/block/n64cart.c +++ b/drivers/block/n64cart.c @@ -88,7 +88,7 @@ static void n64cart_submit_bio(struct bio *bio) { struct bio_vec bvec; struct bvec_iter iter; - struct device *dev = bio->bi_disk->private_data; + struct device *dev = bio->bi_bdev->bd_disk->private_data; u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT; bio_for_each_segment(bvec, bio, iter) { -- cgit v1.2.3 From 726be2c72efc0a64c206e854b8996ad3ab9c7507 Mon Sep 17 00:00:00 2001 From: Pankaj Raghav Date: Tue, 22 Mar 2022 10:20:48 +0100 Subject: nvme: fix the read-only state for zoned namespaces with unsupposed features commit 2f4c9ba23b88 ("nvme: export zoned namespaces without Zone Append support read-only") marks zoned namespaces without append support read-only. It does iso by setting NVME_NS_FORCE_RO in ns->flags in nvme_update_zone_info and checking for that flag later in nvme_update_disk_info to mark the disk as read-only. But commit 73d90386b559 ("nvme: cleanup zone information initialization") rearranged nvme_update_disk_info to be called before nvme_update_zone_info and thus not marking the disk as read-only. The call order cannot be just reverted because nvme_update_zone_info sets certain queue parameters such as zone_write_granularity that depend on the prior call to nvme_update_disk_info. Remove the call to set_disk_ro in nvme_update_disk_info. and call set_disk_ro after nvme_update_zone_info and nvme_update_disk_info to set the permission for ZNS drives correctly. The same applies to the multipath disk path. Fixes: 73d90386b559 ("nvme: cleanup zone information initialization") Signed-off-by: Pankaj Raghav Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3c0461129bca..ccc5877d514b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1867,9 +1867,6 @@ static void nvme_update_disk_info(struct gendisk *disk, nvme_config_discard(disk, ns); blk_queue_max_write_zeroes_sectors(disk->queue, ns->ctrl->max_zeroes_sectors); - - set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) || - test_bit(NVME_NS_FORCE_RO, &ns->flags)); } static inline bool nvme_first_scan(struct gendisk *disk) @@ -1930,6 +1927,8 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) goto out_unfreeze; } + set_disk_ro(ns->disk, (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); set_bit(NVME_NS_READY, &ns->flags); blk_mq_unfreeze_queue(ns->disk->queue); @@ -1942,6 +1941,9 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_id_ns *id) if (nvme_ns_head_multipath(ns->head)) { blk_mq_freeze_queue(ns->head->disk->queue); nvme_update_disk_info(ns->head->disk, ns, id); + set_disk_ro(ns->head->disk, + (id->nsattr & NVME_NS_ATTR_RO) || + test_bit(NVME_NS_FORCE_RO, &ns->flags)); nvme_mpath_revalidate_paths(ns); blk_stack_limits(&ns->head->disk->queue->limits, &ns->queue->limits, 0); -- cgit v1.2.3 From 2e21e4454bd3435ef6e3b84492dcbfaaf9d8769c Mon Sep 17 00:00:00 2001 From: Xin Hao Date: Tue, 22 Mar 2022 10:35:12 +0800 Subject: nvme-pci: expose use_threaded_interrupts read-only in sysfs Allow reading /sys/module/nvme/parameters/use_threaded_interrupts to see if the use_threaded_interrupts module parameter is in use. Signed-off-by: Xin Hao Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9f4f3884fefe..9f3c392fe7a1 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -44,7 +44,7 @@ #define NVME_MAX_SEGS 127 static int use_threaded_interrupts; -module_param(use_threaded_interrupts, int, 0); +module_param(use_threaded_interrupts, int, 0444); static bool use_cmb_sqes = true; module_param(use_cmb_sqes, bool, 0444); -- cgit v1.2.3 From bc360b0b1611566e1bd47384daf49af6a1c51837 Mon Sep 17 00:00:00 2001 From: Monish Kumar R Date: Wed, 16 Mar 2022 13:24:49 +0530 Subject: nvme-pci: add quirks for Samsung X5 SSDs Add quirks to not fail the initialization and to have quick resume latency after cold/warm reboot. Signed-off-by: Monish Kumar R Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 9f3c392fe7a1..66f1eee2509a 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -3466,7 +3466,10 @@ static const struct pci_device_id nvme_id_table[] = { NVME_QUIRK_128_BYTES_SQES | NVME_QUIRK_SHARED_TAGS | NVME_QUIRK_SKIP_CID_GEN }, - + { PCI_DEVICE(0x144d, 0xa808), /* Samsung X5 */ + .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY| + NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_IGNORE_DEV_SUBNQN, }, { PCI_DEVICE_CLASS(PCI_CLASS_STORAGE_EXPRESS, 0xffffff) }, { 0, } }; -- cgit v1.2.3 From 8832cf922151e9dfa2821736beb0ae2dd3968b6e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 21 Mar 2022 13:57:27 +0200 Subject: nvmet: use a private workqueue instead of the system workqueue Any attempt to flush kernel-global WQs has possibility of deadlock so we should simply stop using them, instead introduce nvmet_wq which is the generic nvmet workqueue for work elements that don't explicitly require a dedicated workqueue (by the mere fact that they are using the system_wq). Changes were done using the following replaces: - s/schedule_work(/queue_work(nvmet_wq, /g - s/schedule_delayed_work(/queue_delayed_work(nvmet_wq, /g - s/flush_scheduled_work()/flush_workqueue(nvmet_wq)/g Reported-by: Tetsuo Handa Signed-off-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/configfs.c | 2 +- drivers/nvme/target/core.c | 24 ++++++++++++++++++------ drivers/nvme/target/fc.c | 8 ++++---- drivers/nvme/target/fcloop.c | 16 ++++++++-------- drivers/nvme/target/io-cmd-file.c | 6 +++--- drivers/nvme/target/loop.c | 4 ++-- drivers/nvme/target/nvmet.h | 1 + drivers/nvme/target/passthru.c | 2 +- drivers/nvme/target/rdma.c | 12 ++++++------ drivers/nvme/target/tcp.c | 10 +++++----- 11 files changed, 50 insertions(+), 37 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 46d0dab686dd..397daaf51f1b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -988,7 +988,7 @@ void nvmet_execute_async_event(struct nvmet_req *req) ctrl->async_event_cmds[ctrl->nr_async_event_cmds++] = req; mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } void nvmet_execute_keep_alive(struct nvmet_req *req) diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index b650312dd4ae..c542f8457b1e 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -1593,7 +1593,7 @@ static void nvmet_port_release(struct config_item *item) struct nvmet_port *port = to_nvmet_port(item); /* Let inflight controllers teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); list_del(&port->global_entry); kfree(port->ana_state); diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 92af37d33ec3..cd833b1dd47c 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -20,6 +20,9 @@ struct workqueue_struct *zbd_wq; static const struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; static DEFINE_IDA(cntlid_ida); +struct workqueue_struct *nvmet_wq; +EXPORT_SYMBOL_GPL(nvmet_wq); + /* * This read/write semaphore is used to synchronize access to configuration * information on a target system that will result in discovery log page @@ -205,7 +208,7 @@ void nvmet_add_async_event(struct nvmet_ctrl *ctrl, u8 event_type, list_add_tail(&aen->entry, &ctrl->async_events); mutex_unlock(&ctrl->lock); - schedule_work(&ctrl->async_event_work); + queue_work(nvmet_wq, &ctrl->async_event_work); } static void nvmet_add_to_changed_ns_log(struct nvmet_ctrl *ctrl, __le32 nsid) @@ -385,7 +388,7 @@ static void nvmet_keep_alive_timer(struct work_struct *work) if (reset_tbkas) { pr_debug("ctrl %d reschedule traffic based keep-alive timer\n", ctrl->cntlid); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); return; } @@ -403,7 +406,7 @@ void nvmet_start_keep_alive_timer(struct nvmet_ctrl *ctrl) pr_debug("ctrl %d start keep-alive timer for %d secs\n", ctrl->cntlid, ctrl->kato); - schedule_delayed_work(&ctrl->ka_work, ctrl->kato * HZ); + queue_delayed_work(nvmet_wq, &ctrl->ka_work, ctrl->kato * HZ); } void nvmet_stop_keep_alive_timer(struct nvmet_ctrl *ctrl) @@ -1478,7 +1481,7 @@ void nvmet_ctrl_fatal_error(struct nvmet_ctrl *ctrl) mutex_lock(&ctrl->lock); if (!(ctrl->csts & NVME_CSTS_CFS)) { ctrl->csts |= NVME_CSTS_CFS; - schedule_work(&ctrl->fatal_err_work); + queue_work(nvmet_wq, &ctrl->fatal_err_work); } mutex_unlock(&ctrl->lock); } @@ -1620,9 +1623,15 @@ static int __init nvmet_init(void) goto out_free_zbd_work_queue; } + nvmet_wq = alloc_workqueue("nvmet-wq", WQ_MEM_RECLAIM, 0); + if (!nvmet_wq) { + error = -ENOMEM; + goto out_free_buffered_work_queue; + } + error = nvmet_init_discovery(); if (error) - goto out_free_work_queue; + goto out_free_nvmet_work_queue; error = nvmet_init_configfs(); if (error) @@ -1631,7 +1640,9 @@ static int __init nvmet_init(void) out_exit_discovery: nvmet_exit_discovery(); -out_free_work_queue: +out_free_nvmet_work_queue: + destroy_workqueue(nvmet_wq); +out_free_buffered_work_queue: destroy_workqueue(buffered_io_wq); out_free_zbd_work_queue: destroy_workqueue(zbd_wq); @@ -1643,6 +1654,7 @@ static void __exit nvmet_exit(void) nvmet_exit_configfs(); nvmet_exit_discovery(); ida_destroy(&cntlid_ida); + destroy_workqueue(nvmet_wq); destroy_workqueue(buffered_io_wq); destroy_workqueue(zbd_wq); diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index de90001fc5c4..ab2627e17bb9 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -1491,7 +1491,7 @@ __nvmet_fc_free_assocs(struct nvmet_fc_tgtport *tgtport) list_for_each_entry_rcu(assoc, &tgtport->assoc_list, a_list) { if (!nvmet_fc_tgt_a_get(assoc)) continue; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1546,7 +1546,7 @@ nvmet_fc_invalidate_host(struct nvmet_fc_target_port *target_port, continue; assoc->hostport->invalid = 1; noassoc = false; - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); } @@ -1592,7 +1592,7 @@ nvmet_fc_delete_ctrl(struct nvmet_ctrl *ctrl) nvmet_fc_tgtport_put(tgtport); if (found_ctrl) { - if (!schedule_work(&assoc->del_work)) + if (!queue_work(nvmet_wq, &assoc->del_work)) /* already deleting - release local reference */ nvmet_fc_tgt_a_put(assoc); return; @@ -2060,7 +2060,7 @@ nvmet_fc_rcv_ls_req(struct nvmet_fc_target_port *target_port, iod->rqstdatalen = lsreqbuf_len; iod->hosthandle = hosthandle; - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return 0; } diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 54606f1872b4..5c16372f3b53 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -360,7 +360,7 @@ fcloop_h2t_ls_req(struct nvme_fc_local_port *localport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); return ret; } @@ -393,7 +393,7 @@ fcloop_h2t_xmt_ls_rsp(struct nvmet_fc_target_port *targetport, spin_lock(&rport->lock); list_add_tail(&rport->ls_list, &tls_req->ls_list); spin_unlock(&rport->lock); - schedule_work(&rport->ls_work); + queue_work(nvmet_wq, &rport->ls_work); } return 0; @@ -448,7 +448,7 @@ fcloop_t2h_ls_req(struct nvmet_fc_target_port *targetport, void *hosthandle, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); return ret; } @@ -480,7 +480,7 @@ fcloop_t2h_xmt_ls_rsp(struct nvme_fc_local_port *localport, spin_lock(&tport->lock); list_add_tail(&tport->ls_list, &tls_req->ls_list); spin_unlock(&tport->lock); - schedule_work(&tport->ls_work); + queue_work(nvmet_wq, &tport->ls_work); } return 0; @@ -520,7 +520,7 @@ fcloop_tgt_discovery_evt(struct nvmet_fc_target_port *tgtport) tgt_rscn->tport = tgtport->private; INIT_WORK(&tgt_rscn->work, fcloop_tgt_rscn_work); - schedule_work(&tgt_rscn->work); + queue_work(nvmet_wq, &tgt_rscn->work); } static void @@ -739,7 +739,7 @@ fcloop_fcp_req(struct nvme_fc_local_port *localport, INIT_WORK(&tfcp_req->tio_done_work, fcloop_tgt_fcprqst_done_work); kref_init(&tfcp_req->ref); - schedule_work(&tfcp_req->fcp_rcv_work); + queue_work(nvmet_wq, &tfcp_req->fcp_rcv_work); return 0; } @@ -921,7 +921,7 @@ fcloop_fcp_req_release(struct nvmet_fc_target_port *tgtport, { struct fcloop_fcpreq *tfcp_req = tgt_fcp_req_to_fcpreq(tgt_fcpreq); - schedule_work(&tfcp_req->tio_done_work); + queue_work(nvmet_wq, &tfcp_req->tio_done_work); } static void @@ -976,7 +976,7 @@ fcloop_fcp_abort(struct nvme_fc_local_port *localport, if (abortio) /* leave the reference while the work item is scheduled */ - WARN_ON(!schedule_work(&tfcp_req->abort_rcv_work)); + WARN_ON(!queue_work(nvmet_wq, &tfcp_req->abort_rcv_work)); else { /* * as the io has already had the done callback made, diff --git a/drivers/nvme/target/io-cmd-file.c b/drivers/nvme/target/io-cmd-file.c index 6485dc8eb974..f3d58abf11e0 100644 --- a/drivers/nvme/target/io-cmd-file.c +++ b/drivers/nvme/target/io-cmd-file.c @@ -283,7 +283,7 @@ static void nvmet_file_execute_flush(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_flush_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_execute_discard(struct nvmet_req *req) @@ -343,7 +343,7 @@ static void nvmet_file_execute_dsm(struct nvmet_req *req) if (!nvmet_check_data_len_lte(req, nvmet_dsm_len(req))) return; INIT_WORK(&req->f.work, nvmet_file_dsm_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } static void nvmet_file_write_zeroes_work(struct work_struct *w) @@ -373,7 +373,7 @@ static void nvmet_file_execute_write_zeroes(struct nvmet_req *req) if (!nvmet_check_transfer_len(req, 0)) return; INIT_WORK(&req->f.work, nvmet_file_write_zeroes_work); - schedule_work(&req->f.work); + queue_work(nvmet_wq, &req->f.work); } u16 nvmet_file_parse_io_cmd(struct nvmet_req *req) diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 23f9d6f88804..59024af2da2e 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -166,7 +166,7 @@ static blk_status_t nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, iod->req.transfer_len = blk_rq_payload_bytes(req); } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); return BLK_STS_OK; } @@ -187,7 +187,7 @@ static void nvme_loop_submit_async_event(struct nvme_ctrl *arg) return; } - schedule_work(&iod->work); + queue_work(nvmet_wq, &iod->work); } static int nvme_loop_init_iod(struct nvme_loop_ctrl *ctrl, diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h index d910c6aad4b6..69818752a33a 100644 --- a/drivers/nvme/target/nvmet.h +++ b/drivers/nvme/target/nvmet.h @@ -366,6 +366,7 @@ struct nvmet_req { extern struct workqueue_struct *buffered_io_wq; extern struct workqueue_struct *zbd_wq; +extern struct workqueue_struct *nvmet_wq; static inline void nvmet_set_result(struct nvmet_req *req, u32 result) { diff --git a/drivers/nvme/target/passthru.c b/drivers/nvme/target/passthru.c index a4de1e0d518b..5247c24538eb 100644 --- a/drivers/nvme/target/passthru.c +++ b/drivers/nvme/target/passthru.c @@ -283,7 +283,7 @@ static void nvmet_passthru_execute_cmd(struct nvmet_req *req) if (req->p.use_workqueue || effects) { INIT_WORK(&req->p.work, nvmet_passthru_execute_cmd_work); req->p.rq = rq; - schedule_work(&req->p.work); + queue_work(nvmet_wq, &req->p.work); } else { rq->end_io_data = req; blk_execute_rq_nowait(rq, false, nvmet_passthru_req_done); diff --git a/drivers/nvme/target/rdma.c b/drivers/nvme/target/rdma.c index 2446d0918a41..2fab0b219b25 100644 --- a/drivers/nvme/target/rdma.c +++ b/drivers/nvme/target/rdma.c @@ -1584,7 +1584,7 @@ static int nvmet_rdma_queue_connect(struct rdma_cm_id *cm_id, if (queue->host_qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } ret = nvmet_rdma_cm_accept(cm_id, queue, &event->param.conn); @@ -1669,7 +1669,7 @@ static void __nvmet_rdma_queue_disconnect(struct nvmet_rdma_queue *queue) if (disconnect) { rdma_disconnect(queue->cm_id); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } } @@ -1699,7 +1699,7 @@ static void nvmet_rdma_queue_connect_fail(struct rdma_cm_id *cm_id, mutex_unlock(&nvmet_rdma_queue_mutex); pr_err("failed to connect queue %d\n", queue->idx); - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } /** @@ -1773,7 +1773,7 @@ static int nvmet_rdma_cm_handler(struct rdma_cm_id *cm_id, if (!queue) { struct nvmet_rdma_port *port = cm_id->context; - schedule_delayed_work(&port->repair_work, 0); + queue_delayed_work(nvmet_wq, &port->repair_work, 0); break; } fallthrough; @@ -1903,7 +1903,7 @@ static void nvmet_rdma_repair_port_work(struct work_struct *w) nvmet_rdma_disable_port(port); ret = nvmet_rdma_enable_port(port); if (ret) - schedule_delayed_work(&port->repair_work, 5 * HZ); + queue_delayed_work(nvmet_wq, &port->repair_work, 5 * HZ); } static int nvmet_rdma_add_port(struct nvmet_port *nport) @@ -2053,7 +2053,7 @@ static void nvmet_rdma_remove_one(struct ib_device *ib_device, void *client_data } mutex_unlock(&nvmet_rdma_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } static struct ib_client nvmet_rdma_ib_client = { diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c index 83ca577f72be..2793554e622e 100644 --- a/drivers/nvme/target/tcp.c +++ b/drivers/nvme/target/tcp.c @@ -1269,7 +1269,7 @@ static void nvmet_tcp_schedule_release_queue(struct nvmet_tcp_queue *queue) spin_lock(&queue->state_lock); if (queue->state != NVMET_TCP_Q_DISCONNECTING) { queue->state = NVMET_TCP_Q_DISCONNECTING; - schedule_work(&queue->release_work); + queue_work(nvmet_wq, &queue->release_work); } spin_unlock(&queue->state_lock); } @@ -1684,7 +1684,7 @@ static void nvmet_tcp_listen_data_ready(struct sock *sk) goto out; if (sk->sk_state == TCP_LISTEN) - schedule_work(&port->accept_work); + queue_work(nvmet_wq, &port->accept_work); out: read_unlock_bh(&sk->sk_callback_lock); } @@ -1815,7 +1815,7 @@ static u16 nvmet_tcp_install_queue(struct nvmet_sq *sq) if (sq->qid == 0) { /* Let inflight controller teardown complete */ - flush_scheduled_work(); + flush_workqueue(nvmet_wq); } queue->nr_cmds = sq->size * 2; @@ -1876,12 +1876,12 @@ static void __exit nvmet_tcp_exit(void) nvmet_unregister_transport(&nvmet_tcp_ops); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); mutex_lock(&nvmet_tcp_queue_mutex); list_for_each_entry(queue, &nvmet_tcp_queue_list, queue_list) kernel_sock_shutdown(queue->sock, SHUT_RDWR); mutex_unlock(&nvmet_tcp_queue_mutex); - flush_scheduled_work(); + flush_workqueue(nvmet_wq); destroy_workqueue(nvmet_tcp_wq); } -- cgit v1.2.3 From 63bc732c3aefaa0ad61e290fded9de0f5c8ccd0e Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Fri, 18 Mar 2022 01:30:14 +0000 Subject: nvmet: remove redundant assignment after left shift The left shift is followed by a re-assignment back to cc_css, the assignment is redundant. Fix this by replacing the "<<=" operator with "<<" instead. This cleans up the clang scan build warning: drivers/nvme/target/core.c:1124:10: warning: Although the value stored to 'cc_css' is used in the enclosing expression, the value is never actually read from 'cc_css' [deadcode.DeadStores] Signed-off-by: Colin Ian King Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index cd833b1dd47c..7b2b72cf4423 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -1123,7 +1123,7 @@ static inline u8 nvmet_cc_iocqes(u32 cc) static inline bool nvmet_css_supported(u8 cc_css) { - switch (cc_css <<= NVME_CC_CSS_SHIFT) { + switch (cc_css << NVME_CC_CSS_SHIFT) { case NVME_CC_CSS_NVM: case NVME_CC_CSS_CSI: return true; -- cgit v1.2.3 From 5974ea7ce0f9a5987fc8cf5e08ad6e3e70bb542e Mon Sep 17 00:00:00 2001 From: Sungup Moon Date: Mon, 14 Mar 2022 20:05:45 +0900 Subject: nvme: allow duplicate NSIDs for private namespaces A NVMe subsystem with multiple controller can have private namespaces that use the same NSID under some conditions: "If Namespace Management, ANA Reporting, or NVM Sets are supported, the NSIDs shall be unique within the NVM subsystem. If the Namespace Management, ANA Reporting, and NVM Sets are not supported, then NSIDs: a) for shared namespace shall be unique; and b) for private namespace are not required to be unique." Reference: Section 6.1.6 NSID and Namespace Usage; NVM Express 1.4c spec. Make sure this specific setup is supported in Linux. Fixes: 9ad1927a3bc2 ("nvme: always search for namespace head") Signed-off-by: Sungup Moon [hch: refactored and fixed the controller vs subsystem based naming conflict] Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 15 ++++++++++----- drivers/nvme/host/multipath.c | 7 ++++--- drivers/nvme/host/nvme.h | 19 +++++++++++++++++++ include/linux/nvme.h | 1 + 4 files changed, 34 insertions(+), 8 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index ccc5877d514b..6dc05c1fa7db 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -3627,15 +3627,20 @@ static const struct attribute_group *nvme_dev_attr_groups[] = { NULL, }; -static struct nvme_ns_head *nvme_find_ns_head(struct nvme_subsystem *subsys, +static struct nvme_ns_head *nvme_find_ns_head(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns_head *h; - lockdep_assert_held(&subsys->lock); + lockdep_assert_held(&ctrl->subsys->lock); - list_for_each_entry(h, &subsys->nsheads, entry) { - if (h->ns_id != nsid) + list_for_each_entry(h, &ctrl->subsys->nsheads, entry) { + /* + * Private namespaces can share NSIDs under some conditions. + * In that case we can't use the same ns_head for namespaces + * with the same NSID. + */ + if (h->ns_id != nsid || !nvme_is_unique_nsid(ctrl, h)) continue; if (!list_empty(&h->list) && nvme_tryget_ns_head(h)) return h; @@ -3829,7 +3834,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, unsigned nsid, } mutex_lock(&ctrl->subsys->lock); - head = nvme_find_ns_head(ctrl->subsys, nsid); + head = nvme_find_ns_head(ctrl, nsid); if (!head) { ret = nvme_subsys_check_duplicate_ids(ctrl->subsys, ids); if (ret) { diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index c97d7f843977..ba90555124c4 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -482,10 +482,11 @@ int nvme_mpath_alloc_disk(struct nvme_ctrl *ctrl, struct nvme_ns_head *head) /* * Add a multipath node if the subsystems supports multiple controllers. - * We also do this for private namespaces as the namespace sharing data could - * change after a rescan. + * We also do this for private namespaces as the namespace sharing flag + * could change after a rescan. */ - if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || !multipath) + if (!(ctrl->subsys->cmic & NVME_CTRL_CMIC_MULTI_CTRL) || + !nvme_is_unique_nsid(ctrl, head) || !multipath) return 0; head->disk = blk_alloc_disk(ctrl->numa_node); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1ea908d43e17..1552a48719d6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -722,6 +722,25 @@ static inline bool nvme_check_ready(struct nvme_ctrl *ctrl, struct request *rq, return queue_live; return __nvme_check_ready(ctrl, rq, queue_live); } + +/* + * NSID shall be unique for all shared namespaces, or if at least one of the + * following conditions is met: + * 1. Namespace Management is supported by the controller + * 2. ANA is supported by the controller + * 3. NVM Set are supported by the controller + * + * In other case, private namespace are not required to report a unique NSID. + */ +static inline bool nvme_is_unique_nsid(struct nvme_ctrl *ctrl, + struct nvme_ns_head *head) +{ + return head->shared || + (ctrl->oacs & NVME_CTRL_OACS_NS_MNGT_SUPP) || + (ctrl->subsys->cmic & NVME_CTRL_CMIC_ANA) || + (ctrl->ctratt & NVME_CTRL_CTRATT_NVM_SETS); +} + int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, void *buf, unsigned bufflen); int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9dbc3ef4daf7..2dcee34d467d 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -345,6 +345,7 @@ enum { NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_NS_MNGT_SUPP = 1 << 3, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, -- cgit v1.2.3 From d6d6742772d712ed2238f5071b96baf4924f5fad Mon Sep 17 00:00:00 2001 From: Chris Leech Date: Mon, 21 Mar 2022 15:43:04 -0700 Subject: nvme: fix RCU hole that allowed for endless looping in multipath round robin Make nvme_ns_remove match the assumptions elsewhere. 1) !NVME_NS_READY needs to be srcu synchronized to make sure nothing is running in __nvme_find_path or nvme_round_robin_path that will re-assign this ns to current_path. 2) Any matching current_path entries need to be cleared before removing from the siblings list, to prevent calling nvme_round_robin_path with an "old" ns that's off list. 3) Finally the list_del_rcu can happen, and then synchronize again before releasing any reference counts. Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 6dc05c1fa7db..a7d72ef01171 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4031,6 +4031,16 @@ static void nvme_ns_remove(struct nvme_ns *ns) set_capacity(ns->disk, 0); nvme_fault_inject_fini(&ns->fault_inject); + /* + * Ensure that !NVME_NS_READY is seen by other threads to prevent + * this ns going back into current_path. + */ + synchronize_srcu(&ns->head->srcu); + + /* wait for concurrent submissions */ + if (nvme_mpath_clear_current_path(ns)) + synchronize_srcu(&ns->head->srcu); + mutex_lock(&ns->ctrl->subsys->lock); list_del_rcu(&ns->siblings); if (list_empty(&ns->head->list)) { @@ -4042,10 +4052,6 @@ static void nvme_ns_remove(struct nvme_ns *ns) /* guarantee not available in head->list */ synchronize_rcu(); - /* wait for concurrent submissions */ - if (nvme_mpath_clear_current_path(ns)) - synchronize_srcu(&ns->head->srcu); - if (!nvme_ns_head_multipath(ns->head)) nvme_cdev_del(&ns->cdev, &ns->cdev_device); del_gendisk(ns->disk); -- cgit v1.2.3 From a4a6f3c8f61c3cfbda4998ad94596059ad7e4332 Mon Sep 17 00:00:00 2001 From: Anton Eidelman Date: Thu, 24 Mar 2022 13:05:11 -0600 Subject: nvme-multipath: fix hang when disk goes live over reconnect nvme_mpath_init_identify() invoked from nvme_init_identify() fetches a fresh ANA log from the ctrl. This is essential to have an up to date path states for both existing namespaces and for those scan_work may discover once the ctrl is up. This happens in the following cases: 1) A new ctrl is being connected. 2) An existing ctrl is successfully reconnected. 3) An existing ctrl is being reset. While in (1) ctrl->namespaces is empty, (2 & 3) may have namespaces, and nvme_read_ana_log() may call nvme_update_ns_ana_state(). This result in a hang when the ANA state of an existing namespace changes and makes the disk live: nvme_mpath_set_live() issues IO to the namespace through the ctrl, which does NOT have IO queues yet. See sample hang below. Solution: - nvme_update_ns_ana_state() to call set_live only if ctrl is live - nvme_read_ana_log() call from nvme_mpath_init_identify() therefore only fetches and parses the ANA log; any erros in this process will fail the ctrl setup as appropriate; - a separate function nvme_mpath_update() is called in nvme_start_ctrl(); this parses the ANA log without fetching it. At this point the ctrl is live, therefore, disks can be set live normally. Sample failure: nvme nvme0: starting error recovery nvme nvme0: Reconnecting in 10 seconds... block nvme0n6: no usable path - requeuing I/O INFO: task kworker/u8:3:312 blocked for more than 122 seconds. Tainted: G E 5.14.5-1.el7.elrepo.x86_64 #1 Workqueue: nvme-wq nvme_tcp_reconnect_ctrl_work [nvme_tcp] Call Trace: __schedule+0x2a2/0x7e0 schedule+0x4e/0xb0 io_schedule+0x16/0x40 wait_on_page_bit_common+0x15c/0x3e0 do_read_cache_page+0x1e0/0x410 read_cache_page+0x12/0x20 read_part_sector+0x46/0x100 read_lba+0x121/0x240 efi_partition+0x1d2/0x6a0 bdev_disk_changed.part.0+0x1df/0x430 bdev_disk_changed+0x18/0x20 blkdev_get_whole+0x77/0xe0 blkdev_get_by_dev+0xd2/0x3a0 __device_add_disk+0x1ed/0x310 device_add_disk+0x13/0x20 nvme_mpath_set_live+0x138/0x1b0 [nvme_core] nvme_update_ns_ana_state+0x2b/0x30 [nvme_core] nvme_update_ana_state+0xca/0xe0 [nvme_core] nvme_parse_ana_log+0xac/0x170 [nvme_core] nvme_read_ana_log+0x7d/0xe0 [nvme_core] nvme_mpath_init_identify+0x105/0x150 [nvme_core] nvme_init_identify+0x2df/0x4d0 [nvme_core] nvme_init_ctrl_finish+0x8d/0x3b0 [nvme_core] nvme_tcp_setup_ctrl+0x337/0x390 [nvme_tcp] nvme_tcp_reconnect_ctrl_work+0x24/0x40 [nvme_tcp] process_one_work+0x1bd/0x360 worker_thread+0x50/0x3d0 Signed-off-by: Anton Eidelman Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 1 + drivers/nvme/host/multipath.c | 25 +++++++++++++++++++++++-- drivers/nvme/host/nvme.h | 4 ++++ 3 files changed, 28 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a7d72ef01171..f204c6f78b5b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -4522,6 +4522,7 @@ void nvme_start_ctrl(struct nvme_ctrl *ctrl) if (ctrl->queue_count > 1) { nvme_queue_scan(ctrl); nvme_start_queues(ctrl); + nvme_mpath_update(ctrl); } nvme_change_uevent(ctrl, "NVME_EVENT=connected"); diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c index ba90555124c4..7fc58e1f6b09 100644 --- a/drivers/nvme/host/multipath.c +++ b/drivers/nvme/host/multipath.c @@ -613,8 +613,17 @@ static void nvme_update_ns_ana_state(struct nvme_ana_group_desc *desc, ns->ana_grpid = le32_to_cpu(desc->grpid); ns->ana_state = desc->state; clear_bit(NVME_NS_ANA_PENDING, &ns->flags); - - if (nvme_state_is_live(ns->ana_state)) + /* + * nvme_mpath_set_live() will trigger I/O to the multipath path device + * and in turn to this path device. However we cannot accept this I/O + * if the controller is not live. This may deadlock if called from + * nvme_mpath_init_identify() and the ctrl will never complete + * initialization, preventing I/O from completing. For this case we + * will reprocess the ANA log page in nvme_mpath_update() once the + * controller is ready. + */ + if (nvme_state_is_live(ns->ana_state) && + ns->ctrl->state == NVME_CTRL_LIVE) nvme_mpath_set_live(ns); } @@ -701,6 +710,18 @@ static void nvme_ana_work(struct work_struct *work) nvme_read_ana_log(ctrl); } +void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ + u32 nr_change_groups = 0; + + if (!ctrl->ana_log_buf) + return; + + mutex_lock(&ctrl->ana_lock); + nvme_parse_ana_log(ctrl, &nr_change_groups, nvme_update_ana_state); + mutex_unlock(&ctrl->ana_lock); +} + static void nvme_anatt_timeout(struct timer_list *t) { struct nvme_ctrl *ctrl = from_timer(ctrl, t, anatt_timer); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 1552a48719d6..a1f8403ffd78 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -800,6 +800,7 @@ void nvme_mpath_add_disk(struct nvme_ns *ns, struct nvme_id_ns *id); void nvme_mpath_remove_disk(struct nvme_ns_head *head); int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, struct nvme_id_ctrl *id); void nvme_mpath_init_ctrl(struct nvme_ctrl *ctrl); +void nvme_mpath_update(struct nvme_ctrl *ctrl); void nvme_mpath_uninit(struct nvme_ctrl *ctrl); void nvme_mpath_stop(struct nvme_ctrl *ctrl); bool nvme_mpath_clear_current_path(struct nvme_ns *ns); @@ -871,6 +872,9 @@ static inline int nvme_mpath_init_identify(struct nvme_ctrl *ctrl, "Please enable CONFIG_NVME_MULTIPATH for full support of multi-port devices.\n"); return 0; } +static inline void nvme_mpath_update(struct nvme_ctrl *ctrl) +{ +} static inline void nvme_mpath_uninit(struct nvme_ctrl *ctrl) { } -- cgit v1.2.3 From f941c51eeac7ebe0f8ec30943bf78e7f60aad039 Mon Sep 17 00:00:00 2001 From: Carlos Llamas Date: Tue, 29 Mar 2022 20:18:15 +0000 Subject: loop: fix ioctl calls using compat_loop_info Support for cryptoloop was deleted in commit 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer"), making the usage of loop_info->lo_encrypt_type obsolete. However, this member was also removed from the compat_loop_info definition and this breaks userspace ioctl calls for 32-bit binaries and CONFIG_COMPAT=y. This patch restores the compat_loop_info->lo_encrypt_type member and marks it obsolete as well as in the uapi header definitions. Fixes: 47e9624616c8 ("block: remove support for cryptoloop and the xor transfer") Signed-off-by: Carlos Llamas Reviewed-by: Christoph Hellwig Link: https://lore.kernel.org/r/20220329201815.1347500-1-cmllamas@google.com Signed-off-by: Jens Axboe --- drivers/block/loop.c | 1 + include/uapi/linux/loop.h | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index e733c48de2e9..39e7650f89b1 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1592,6 +1592,7 @@ struct compat_loop_info { compat_ulong_t lo_inode; /* ioctl r/o */ compat_dev_t lo_rdevice; /* ioctl r/o */ compat_int_t lo_offset; + compat_int_t lo_encrypt_type; /* obsolete, ignored */ compat_int_t lo_encrypt_key_size; /* ioctl w/o */ compat_int_t lo_flags; /* ioctl r/o */ char lo_name[LO_NAME_SIZE]; diff --git a/include/uapi/linux/loop.h b/include/uapi/linux/loop.h index 24a1c45bd1ae..98e60801195e 100644 --- a/include/uapi/linux/loop.h +++ b/include/uapi/linux/loop.h @@ -45,7 +45,7 @@ struct loop_info { unsigned long lo_inode; /* ioctl r/o */ __kernel_old_dev_t lo_rdevice; /* ioctl r/o */ int lo_offset; - int lo_encrypt_type; + int lo_encrypt_type; /* obsolete, ignored */ int lo_encrypt_key_size; /* ioctl w/o */ int lo_flags; char lo_name[LO_NAME_SIZE]; @@ -61,7 +61,7 @@ struct loop_info64 { __u64 lo_offset; __u64 lo_sizelimit;/* bytes, 0 == max available */ __u32 lo_number; /* ioctl r/o */ - __u32 lo_encrypt_type; + __u32 lo_encrypt_type; /* obsolete, ignored */ __u32 lo_encrypt_key_size; /* ioctl w/o */ __u32 lo_flags; __u8 lo_file_name[LO_NAME_SIZE]; -- cgit v1.2.3 From f4329d1f848ac35757d9cc5487669d19dfc5979c Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 30 Mar 2022 20:55:51 +0200 Subject: drbd: fix potential silent data corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Scenario: --------- bio chain generated by blk_queue_split(). Some split bio fails and propagates its error status to the "parent" bio. But then the (last part of the) parent bio itself completes without error. We would clobber the already recorded error status with BLK_STS_OK, causing silent data corruption. Reproducer: ----------- How to trigger this in the real world within seconds: DRBD on top of degraded parity raid, small stripe_cache_size, large read_ahead setting. Drop page cache (sysctl vm.drop_caches=1, fadvise "DONTNEED", umount and mount again, "reboot"). Cause significant read ahead. Large read ahead request is split by blk_queue_split(). Parts of the read ahead that are already in the stripe cache, or find an available stripe cache to use, can be serviced. Parts of the read ahead that would need "too much work", would need to wait for a "stripe_head" to become available, are rejected immediately. For larger read ahead requests that are split in many pieces, it is very likely that some "splits" will be serviced, but then the stripe cache is exhausted/busy, and the remaining ones will be rejected. Signed-off-by: Lars Ellenberg Signed-off-by: Christoph Böhmwalder Cc: # 4.13.x Link: https://lore.kernel.org/r/20220330185551.3553196-1-christoph.boehmwalder@linbit.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index c00ae8619519..ebe0e5c8d0ac 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -181,7 +181,8 @@ void start_new_tl_epoch(struct drbd_connection *connection) void complete_master_bio(struct drbd_device *device, struct bio_and_error *m) { - m->bio->bi_status = errno_to_blk_status(m->error); + if (unlikely(m->error)) + m->bio->bi_status = errno_to_blk_status(m->error); bio_endio(m->bio); dec_ap_bio(device); } -- cgit v1.2.3 From 6d35d04a9e18990040e87d2bbf72689252669d54 Mon Sep 17 00:00:00 2001 From: Zhang Wensheng Date: Thu, 10 Mar 2022 17:32:24 +0800 Subject: nbd: fix possible overflow on 'first_minor' in nbd_dev_add() When 'index' is a big numbers, it may become negative which forced to 'int'. then 'index << part_shift' might overflow to a positive value that is not greater than '0xfffff', then sysfs might complains about duplicate creation. Because of this, move the 'index' judgment to the front will fix it and be better. Fixes: b0d9111a2d53 ("nbd: use an idr to keep track of nbd devices") Fixes: 940c264984fd ("nbd: fix possible overflow for 'first_minor' in nbd_dev_add()") Signed-off-by: Zhang Wensheng Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20220310093224.4002895-1-zhangwensheng5@huawei.com Signed-off-by: Jens Axboe --- drivers/block/nbd.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers') diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 5a1f98494ddd..b3cdfc0ffb98 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -1800,17 +1800,6 @@ static struct nbd_device *nbd_dev_add(int index, unsigned int refs) refcount_set(&nbd->refs, 0); INIT_LIST_HEAD(&nbd->list); disk->major = NBD_MAJOR; - - /* Too big first_minor can cause duplicate creation of - * sysfs files/links, since index << part_shift might overflow, or - * MKDEV() expect that the max bits of first_minor is 20. - */ - disk->first_minor = index << part_shift; - if (disk->first_minor < index || disk->first_minor > MINORMASK) { - err = -EINVAL; - goto out_free_work; - } - disk->minors = 1 << part_shift; disk->fops = &nbd_fops; disk->private_data = nbd; @@ -1915,8 +1904,19 @@ static int nbd_genl_connect(struct sk_buff *skb, struct genl_info *info) if (!netlink_capable(skb, CAP_SYS_ADMIN)) return -EPERM; - if (info->attrs[NBD_ATTR_INDEX]) + if (info->attrs[NBD_ATTR_INDEX]) { index = nla_get_u32(info->attrs[NBD_ATTR_INDEX]); + + /* + * Too big first_minor can cause duplicate creation of + * sysfs files/links, since index << part_shift might overflow, or + * MKDEV() expect that the max bits of first_minor is 20. + */ + if (index < 0 || index > MINORMASK >> part_shift) { + printk(KERN_ERR "nbd: illegal input index %d\n", index); + return -EINVAL; + } + } if (!info->attrs[NBD_ATTR_SOCKETS]) { printk(KERN_ERR "nbd: must specify at least one socket\n"); return -EINVAL; -- cgit v1.2.3 From 901aeda62efa21f2eae937bccb71b49ae531be06 Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 1 Apr 2022 00:03:48 +0200 Subject: drbd: remove usage of list iterator variable after loop In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to iterate through the list [1]. Since that variable should not be used past the loop iteration, a separate variable is used to 'remember the current location within the loop'. To either continue iterating from that position or skip the iteration (if the previous iteration was complete) list_prepare_entry() is used. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Link: https://lore.kernel.org/r/20220331220349.885126-1-jakobkoschel@gmail.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_main.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 6f450816c4fa..6831ddbae49d 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -171,7 +171,7 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, unsigned int set_size) { struct drbd_request *r; - struct drbd_request *req = NULL; + struct drbd_request *req = NULL, *tmp = NULL; int expect_epoch = 0; int expect_size = 0; @@ -225,8 +225,11 @@ void tl_release(struct drbd_connection *connection, unsigned int barrier_nr, * to catch requests being barrier-acked "unexpectedly". * It usually should find the same req again, or some READ preceding it. */ list_for_each_entry(req, &connection->transfer_log, tl_requests) - if (req->epoch == expect_epoch) + if (req->epoch == expect_epoch) { + tmp = req; break; + } + req = list_prepare_entry(tmp, &connection->transfer_log, tl_requests); list_for_each_entry_safe_from(req, r, &connection->transfer_log, tl_requests) { if (req->epoch != expect_epoch) break; -- cgit v1.2.3 From 2651ee5ae43241831ca63d7158bb2b151a6a0e1f Mon Sep 17 00:00:00 2001 From: Jakob Koschel Date: Fri, 1 Apr 2022 00:03:49 +0200 Subject: drbd: remove check of list iterator against head past the loop body MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When list_for_each_entry() completes the iteration over the whole list without breaking the loop, the iterator value will be a bogus pointer computed based on the head element. While it is safe to use the pointer to determine if it was computed based on the head element, either with list_entry_is_head() or &pos->member == head, using the iterator variable after the loop should be avoided. In preparation to limit the scope of a list iterator to the list traversal loop, use a dedicated pointer to point to the found element [1]. Link: https://lore.kernel.org/all/CAHk-=wgRr_D8CB-D9Kg-c=EHreAsk5SqXPwr9Y7k9sA6cWXJ6w@mail.gmail.com/ [1] Signed-off-by: Jakob Koschel Reviewed-by: Christoph Böhmwalder Link: https://lore.kernel.org/r/20220331220349.885126-2-jakobkoschel@gmail.com Signed-off-by: Jens Axboe --- drivers/block/drbd/drbd_req.c | 42 +++++++++++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 15 deletions(-) (limited to 'drivers') diff --git a/drivers/block/drbd/drbd_req.c b/drivers/block/drbd/drbd_req.c index ebe0e5c8d0ac..5df2a6063f6b 100644 --- a/drivers/block/drbd/drbd_req.c +++ b/drivers/block/drbd/drbd_req.c @@ -334,17 +334,21 @@ static void set_if_null_req_next(struct drbd_peer_device *peer_device, struct dr static void advance_conn_req_next(struct drbd_peer_device *peer_device, struct drbd_request *req) { struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + struct drbd_request *iter = req; if (!connection) return; if (connection->req_next != req) return; - list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { - const unsigned s = req->rq_state; - if (s & RQ_NET_QUEUED) + + req = NULL; + list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { + const unsigned int s = iter->rq_state; + + if (s & RQ_NET_QUEUED) { + req = iter; break; + } } - if (&req->tl_requests == &connection->transfer_log) - req = NULL; connection->req_next = req; } @@ -360,17 +364,21 @@ static void set_if_null_req_ack_pending(struct drbd_peer_device *peer_device, st static void advance_conn_req_ack_pending(struct drbd_peer_device *peer_device, struct drbd_request *req) { struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + struct drbd_request *iter = req; if (!connection) return; if (connection->req_ack_pending != req) return; - list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { - const unsigned s = req->rq_state; - if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) + + req = NULL; + list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { + const unsigned int s = iter->rq_state; + + if ((s & RQ_NET_SENT) && (s & RQ_NET_PENDING)) { + req = iter; break; + } } - if (&req->tl_requests == &connection->transfer_log) - req = NULL; connection->req_ack_pending = req; } @@ -386,17 +394,21 @@ static void set_if_null_req_not_net_done(struct drbd_peer_device *peer_device, s static void advance_conn_req_not_net_done(struct drbd_peer_device *peer_device, struct drbd_request *req) { struct drbd_connection *connection = peer_device ? peer_device->connection : NULL; + struct drbd_request *iter = req; if (!connection) return; if (connection->req_not_net_done != req) return; - list_for_each_entry_continue(req, &connection->transfer_log, tl_requests) { - const unsigned s = req->rq_state; - if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) + + req = NULL; + list_for_each_entry_continue(iter, &connection->transfer_log, tl_requests) { + const unsigned int s = iter->rq_state; + + if ((s & RQ_NET_SENT) && !(s & RQ_NET_DONE)) { + req = iter; break; + } } - if (&req->tl_requests == &connection->transfer_log) - req = NULL; connection->req_not_net_done = req; } -- cgit v1.2.3