summaryrefslogtreecommitdiff
path: root/drivers/nvme/host
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host')
-rw-r--r--drivers/nvme/host/apple.c6
-rw-r--r--drivers/nvme/host/core.c77
-rw-r--r--drivers/nvme/host/fabrics.h7
-rw-r--r--drivers/nvme/host/fc.c4
-rw-r--r--drivers/nvme/host/ioctl.c15
-rw-r--r--drivers/nvme/host/multipath.c3
-rw-r--r--drivers/nvme/host/nvme.h38
-rw-r--r--drivers/nvme/host/pci.c25
-rw-r--r--drivers/nvme/host/pr.c3
-rw-r--r--drivers/nvme/host/sysfs.c3
-rw-r--r--drivers/nvme/host/tcp.c31
-rw-r--r--drivers/nvme/host/trace.c105
-rw-r--r--drivers/nvme/host/zns.c33
13 files changed, 262 insertions, 88 deletions
diff --git a/drivers/nvme/host/apple.c b/drivers/nvme/host/apple.c
index a480cdeac288..dd6ec0865141 100644
--- a/drivers/nvme/host/apple.c
+++ b/drivers/nvme/host/apple.c
@@ -1532,7 +1532,7 @@ put_dev:
return ret;
}
-static int apple_nvme_remove(struct platform_device *pdev)
+static void apple_nvme_remove(struct platform_device *pdev)
{
struct apple_nvme *anv = platform_get_drvdata(pdev);
@@ -1547,8 +1547,6 @@ static int apple_nvme_remove(struct platform_device *pdev)
apple_rtkit_shutdown(anv->rtk);
apple_nvme_detach_genpd(anv);
-
- return 0;
}
static void apple_nvme_shutdown(struct platform_device *pdev)
@@ -1598,7 +1596,7 @@ static struct platform_driver apple_nvme_driver = {
.pm = pm_sleep_ptr(&apple_nvme_pm_ops),
},
.probe = apple_nvme_probe,
- .remove = apple_nvme_remove,
+ .remove_new = apple_nvme_remove,
.shutdown = apple_nvme_shutdown,
};
module_platform_driver(apple_nvme_driver);
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 00864a634470..bf7615cb36ee 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -629,27 +629,6 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
EXPORT_SYMBOL_GPL(nvme_change_ctrl_state);
/*
- * Returns true for sink states that can't ever transition back to live.
- */
-static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
-{
- switch (nvme_ctrl_state(ctrl)) {
- case NVME_CTRL_NEW:
- case NVME_CTRL_LIVE:
- case NVME_CTRL_RESETTING:
- case NVME_CTRL_CONNECTING:
- return false;
- case NVME_CTRL_DELETING:
- case NVME_CTRL_DELETING_NOIO:
- case NVME_CTRL_DEAD:
- return true;
- default:
- WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
- return true;
- }
-}
-
-/*
* Waits for the controller state to be resetting, or returns false if it is
* not possible to ever transition to that state.
*/
@@ -1807,9 +1786,6 @@ static void nvme_config_discard(struct nvme_ns *ns, struct queue_limits *lim)
{
struct nvme_ctrl *ctrl = ns->ctrl;
- BUILD_BUG_ON(PAGE_SIZE / sizeof(struct nvme_dsm_range) <
- NVME_DSM_MAX_RANGES);
-
if (ctrl->dmrsl && ctrl->dmrsl <= nvme_sect_to_lba(ns->head, UINT_MAX))
lim->max_hw_discard_sectors =
nvme_lba_to_sect(ns->head, ctrl->dmrsl);
@@ -2079,6 +2055,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
bool vwc = ns->ctrl->vwc & NVME_CTRL_VWC_PRESENT;
struct queue_limits lim;
struct nvme_id_ns_nvm *nvm = NULL;
+ struct nvme_zone_info zi = {};
struct nvme_id_ns *id;
sector_t capacity;
unsigned lbaf;
@@ -2091,9 +2068,10 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
if (id->ncap == 0) {
/* namespace not allocated or attached */
info->is_removed = true;
- ret = -ENODEV;
+ ret = -ENXIO;
goto out;
}
+ lbaf = nvme_lbaf_index(id->flbas);
if (ns->ctrl->ctratt & NVME_CTRL_ATTR_ELBAS) {
ret = nvme_identify_ns_nvm(ns->ctrl, info->nsid, &nvm);
@@ -2101,8 +2079,14 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
goto out;
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
+ ns->head->ids.csi == NVME_CSI_ZNS) {
+ ret = nvme_query_zone_info(ns, lbaf, &zi);
+ if (ret < 0)
+ goto out;
+ }
+
blk_mq_freeze_queue(ns->disk->queue);
- lbaf = nvme_lbaf_index(id->flbas);
ns->head->lba_shift = id->lbaf[lbaf].ds;
ns->head->nuse = le64_to_cpu(id->nuse);
capacity = nvme_lba_to_sect(ns->head, le64_to_cpu(id->nsze));
@@ -2115,13 +2099,8 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
capacity = 0;
nvme_config_discard(ns, &lim);
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- ns->head->ids.csi == NVME_CSI_ZNS) {
- ret = nvme_update_zone_info(ns, lbaf, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue);
- goto out;
- }
- }
+ ns->head->ids.csi == NVME_CSI_ZNS)
+ nvme_update_zone_info(ns, &lim, &zi);
ret = queue_limits_commit_update(ns->disk->queue, &lim);
if (ret) {
blk_mq_unfreeze_queue(ns->disk->queue);
@@ -2153,7 +2132,7 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
blk_mq_unfreeze_queue(ns->disk->queue);
if (blk_queue_is_zoned(ns->queue)) {
- ret = blk_revalidate_disk_zones(ns->disk, NULL);
+ ret = blk_revalidate_disk_zones(ns->disk);
if (ret && !nvme_first_scan(ns->disk))
goto out;
}
@@ -2204,6 +2183,7 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
}
if (!ret && nvme_ns_head_multipath(ns->head)) {
+ struct queue_limits *ns_lim = &ns->disk->queue->limits;
struct queue_limits lim;
blk_mq_freeze_queue(ns->head->disk->queue);
@@ -2215,7 +2195,26 @@ static int nvme_update_ns_info(struct nvme_ns *ns, struct nvme_ns_info *info)
set_disk_ro(ns->head->disk, nvme_ns_is_readonly(ns, info));
nvme_mpath_revalidate_paths(ns);
+ /*
+ * queue_limits mixes values that are the hardware limitations
+ * for bio splitting with what is the device configuration.
+ *
+ * For NVMe the device configuration can change after e.g. a
+ * Format command, and we really want to pick up the new format
+ * value here. But we must still stack the queue limits to the
+ * least common denominator for multipathing to split the bios
+ * properly.
+ *
+ * To work around this, we explicitly set the device
+ * configuration to those that we just queried, but only stack
+ * the splitting limits in to make sure we still obey possibly
+ * lower limitations of other controllers.
+ */
lim = queue_limits_start_update(ns->head->disk->queue);
+ lim.logical_block_size = ns_lim->logical_block_size;
+ lim.physical_block_size = ns_lim->physical_block_size;
+ lim.io_min = ns_lim->io_min;
+ lim.io_opt = ns_lim->io_opt;
queue_limits_stack_bdev(&lim, ns->disk->part0, 0,
ns->head->disk->disk_name);
ret = queue_limits_commit_update(ns->head->disk->queue, &lim);
@@ -3237,7 +3236,7 @@ static int nvme_init_identify(struct nvme_ctrl *ctrl)
if (ctrl->shutdown_timeout != shutdown_timeout)
dev_info(ctrl->device,
- "Shutdown timeout set to %u seconds\n",
+ "D3 entry latency set to %u seconds\n",
ctrl->shutdown_timeout);
} else
ctrl->shutdown_timeout = shutdown_timeout;
@@ -3661,7 +3660,7 @@ static int nvme_init_ns_head(struct nvme_ns *ns, struct nvme_ns_info *info)
"Found shared namespace %d, but multipathing not supported.\n",
info->nsid);
dev_warn_once(ctrl->device,
- "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0\n.");
+ "Support for shared namespaces without CONFIG_NVME_MULTIPATH is deprecated and will be removed in Linux 6.0.\n");
}
}
@@ -4391,7 +4390,8 @@ int nvme_alloc_admin_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
set->ops = ops;
set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
if (ctrl->ops->flags & NVME_F_FABRICS)
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ /* Reserved for fabric connect and keep alive */
+ set->reserved_tags = 2;
set->numa_node = ctrl->numa_node;
set->flags = BLK_MQ_F_NO_SCHED;
if (ctrl->ops->flags & NVME_F_BLOCKING)
@@ -4460,7 +4460,8 @@ int nvme_alloc_io_tag_set(struct nvme_ctrl *ctrl, struct blk_mq_tag_set *set,
if (ctrl->quirks & NVME_QUIRK_SHARED_TAGS)
set->reserved_tags = NVME_AQ_DEPTH;
else if (ctrl->ops->flags & NVME_F_FABRICS)
- set->reserved_tags = NVMF_RESERVED_TAGS;
+ /* Reserved for fabric connect */
+ set->reserved_tags = 1;
set->numa_node = ctrl->numa_node;
set->flags = BLK_MQ_F_SHOULD_MERGE;
if (ctrl->ops->flags & NVME_F_BLOCKING)
diff --git a/drivers/nvme/host/fabrics.h b/drivers/nvme/host/fabrics.h
index 06cc54851b1b..37c974c38dcb 100644
--- a/drivers/nvme/host/fabrics.h
+++ b/drivers/nvme/host/fabrics.h
@@ -19,13 +19,6 @@
#define NVMF_DEF_FAIL_FAST_TMO -1
/*
- * Reserved one command for internal usage. This command is used for sending
- * the connect command, as well as for the keep alive command on the admin
- * queue once live.
- */
-#define NVMF_RESERVED_TAGS 1
-
-/*
* Define a host as seen by the target. We allocate one at boot, but also
* allow the override it when creating controllers. This is both to provide
* persistence of the Host NQN over multiple boots, and to allow using
diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c
index 68a5d971657b..a5b29e9ad342 100644
--- a/drivers/nvme/host/fc.c
+++ b/drivers/nvme/host/fc.c
@@ -2428,7 +2428,7 @@ nvme_fc_ctrl_get(struct nvme_fc_ctrl *ctrl)
* controller. Called after last nvme_put_ctrl() call
*/
static void
-nvme_fc_nvme_ctrl_freed(struct nvme_ctrl *nctrl)
+nvme_fc_free_ctrl(struct nvme_ctrl *nctrl)
{
struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
@@ -3384,7 +3384,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
- .free_ctrl = nvme_fc_nvme_ctrl_freed,
+ .free_ctrl = nvme_fc_free_ctrl,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,
.get_address = nvmf_get_address,
diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
index 3dfd5ae99ae0..499a8bb7cac7 100644
--- a/drivers/nvme/host/ioctl.c
+++ b/drivers/nvme/host/ioctl.c
@@ -423,13 +423,20 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
/*
- * For iopoll, complete it directly.
+ * For iopoll, complete it directly. Note that using the uring_cmd
+ * helper for this is safe only because we check blk_rq_is_poll().
+ * As that returns false if we're NOT on a polled queue, then it's
+ * safe to use the polled completion helper.
+ *
* Otherwise, move the completion to task work.
*/
- if (blk_rq_is_poll(req))
- nvme_uring_task_cb(ioucmd, IO_URING_F_UNLOCKED);
- else
+ if (blk_rq_is_poll(req)) {
+ if (pdu->bio)
+ blk_rq_unmap_user(pdu->bio);
+ io_uring_cmd_iopoll_done(ioucmd, pdu->result, pdu->status);
+ } else {
io_uring_cmd_do_in_task_lazy(ioucmd, nvme_uring_task_cb);
+ }
return RQ_END_IO_FREE;
}
diff --git a/drivers/nvme/host/multipath.c b/drivers/nvme/host/multipath.c
index 5397fb428b24..d16e976ae1a4 100644
--- a/drivers/nvme/host/multipath.c
+++ b/drivers/nvme/host/multipath.c
@@ -247,7 +247,8 @@ static struct nvme_ns *__nvme_find_path(struct nvme_ns_head *head, int node)
if (nvme_path_is_disabled(ns))
continue;
- if (READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
+ if (ns->ctrl->numa_node != NUMA_NO_NODE &&
+ READ_ONCE(head->subsys->iopolicy) == NVME_IOPOLICY_NUMA)
distance = node_distance(node, ns->ctrl->numa_node);
else
distance = LOCAL_DISTANCE;
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index 24193fcb8bd5..05532c281177 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -162,6 +162,11 @@ enum nvme_quirks {
* Disables simple suspend/resume path.
*/
NVME_QUIRK_FORCE_NO_SIMPLE_SUSPEND = (1 << 20),
+
+ /*
+ * MSI (but not MSI-X) interrupts are broken and never fire.
+ */
+ NVME_QUIRK_BROKEN_MSI = (1 << 21),
};
/*
@@ -741,6 +746,27 @@ static inline bool nvme_is_aen_req(u16 qid, __u16 command_id)
nvme_tag_from_cid(command_id) >= NVME_AQ_BLK_MQ_DEPTH;
}
+/*
+ * Returns true for sink states that can't ever transition back to live.
+ */
+static inline bool nvme_state_terminal(struct nvme_ctrl *ctrl)
+{
+ switch (nvme_ctrl_state(ctrl)) {
+ case NVME_CTRL_NEW:
+ case NVME_CTRL_LIVE:
+ case NVME_CTRL_RESETTING:
+ case NVME_CTRL_CONNECTING:
+ return false;
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DELETING_NOIO:
+ case NVME_CTRL_DEAD:
+ return true;
+ default:
+ WARN_ONCE(1, "Unhandled ctrl state:%d", ctrl->state);
+ return true;
+ }
+}
+
void nvme_complete_rq(struct request *req);
void nvme_complete_batch_req(struct request *req);
@@ -1036,10 +1062,18 @@ static inline bool nvme_disk_is_ns_head(struct gendisk *disk)
}
#endif /* CONFIG_NVME_MULTIPATH */
+struct nvme_zone_info {
+ u64 zone_size;
+ unsigned int max_open_zones;
+ unsigned int max_active_zones;
+};
+
int nvme_ns_report_zones(struct nvme_ns *ns, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim);
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi);
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi);
#ifdef CONFIG_BLK_DEV_ZONED
blk_status_t nvme_setup_zone_mgmt_send(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd,
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index e6267a6aa380..710043086dff 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -1286,6 +1286,9 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
u32 csts = readl(dev->bar + NVME_REG_CSTS);
u8 opcode;
+ if (nvme_state_terminal(&dev->ctrl))
+ goto disable;
+
/* If PCI error recovery process is happening, we cannot reset or
* the recovery mechanism will surely fail.
*/
@@ -1390,8 +1393,11 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req)
return BLK_EH_RESET_TIMER;
disable:
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING))
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_RESETTING)) {
+ if (nvme_state_terminal(&dev->ctrl))
+ nvme_dev_disable(dev, true);
return BLK_EH_DONE;
+ }
nvme_dev_disable(dev, false);
if (nvme_try_sched_reset(&dev->ctrl))
@@ -2218,6 +2224,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
.priv = dev,
};
unsigned int irq_queues, poll_queues;
+ unsigned int flags = PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY;
/*
* Poll queues don't need interrupts, but we need at least one I/O queue
@@ -2241,8 +2248,10 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
irq_queues = 1;
if (!(dev->ctrl.quirks & NVME_QUIRK_SINGLE_VECTOR))
irq_queues += (nr_io_queues - poll_queues);
- return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues,
- PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY, &affd);
+ if (dev->ctrl.quirks & NVME_QUIRK_BROKEN_MSI)
+ flags &= ~PCI_IRQ_MSI;
+ return pci_alloc_irq_vectors_affinity(pdev, 1, irq_queues, flags,
+ &affd);
}
static unsigned int nvme_max_io_queues(struct nvme_dev *dev)
@@ -2471,6 +2480,7 @@ static int nvme_pci_enable(struct nvme_dev *dev)
{
int result = -ENOMEM;
struct pci_dev *pdev = to_pci_dev(dev->dev);
+ unsigned int flags = PCI_IRQ_ALL_TYPES;
if (pci_enable_device_mem(pdev))
return result;
@@ -2487,7 +2497,9 @@ static int nvme_pci_enable(struct nvme_dev *dev)
* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
* adjust this later.
*/
- result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (dev->ctrl.quirks & NVME_QUIRK_BROKEN_MSI)
+ flags &= ~PCI_IRQ_MSI;
+ result = pci_alloc_irq_vectors(pdev, 1, 1, flags);
if (result < 0)
goto disable;
@@ -3363,6 +3375,9 @@ static const struct pci_device_id nvme_id_table[] = {
NVME_QUIRK_BOGUS_NID, },
{ PCI_VDEVICE(REDHAT, 0x0010), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_BOGUS_NID, },
+ { PCI_DEVICE(0x126f, 0x2262), /* Silicon Motion generic */
+ .driver_data = NVME_QUIRK_NO_DEEPEST_PS |
+ NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x126f, 0x2263), /* Silicon Motion unidentified */
.driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
NVME_QUIRK_BOGUS_NID, },
@@ -3381,6 +3396,8 @@ static const struct pci_device_id nvme_id_table[] = {
.driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
NVME_QUIRK_DISABLE_WRITE_ZEROES|
NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+ { PCI_DEVICE(0x15b7, 0x5008), /* Sandisk SN530 */
+ .driver_data = NVME_QUIRK_BROKEN_MSI },
{ PCI_DEVICE(0x1987, 0x5012), /* Phison E12 */
.driver_data = NVME_QUIRK_BOGUS_NID, },
{ PCI_DEVICE(0x1987, 0x5016), /* Phison E16 */
diff --git a/drivers/nvme/host/pr.c b/drivers/nvme/host/pr.c
index fc3eed00f9ff..e05571b2a1b0 100644
--- a/drivers/nvme/host/pr.c
+++ b/drivers/nvme/host/pr.c
@@ -97,8 +97,7 @@ static int nvme_sc_to_pr_err(int nvme_sc)
static int nvme_send_pr_command(struct block_device *bdev,
struct nvme_command *c, void *data, unsigned int data_len)
{
- if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
- nvme_disk_is_ns_head(bdev->bd_disk))
+ if (nvme_disk_is_ns_head(bdev->bd_disk))
return nvme_send_ns_head_pr_command(bdev, c, data, data_len);
return nvme_send_ns_pr_command(bdev->bd_disk->private_data, c, data,
diff --git a/drivers/nvme/host/sysfs.c b/drivers/nvme/host/sysfs.c
index 09fcaa519e5b..3c55f7edd181 100644
--- a/drivers/nvme/host/sysfs.c
+++ b/drivers/nvme/host/sysfs.c
@@ -236,8 +236,7 @@ static ssize_t nuse_show(struct device *dev, struct device_attribute *attr,
struct block_device *bdev = disk->part0;
int ret;
- if (IS_ENABLED(CONFIG_NVME_MULTIPATH) &&
- bdev->bd_disk->fops == &nvme_ns_head_ops)
+ if (nvme_disk_is_ns_head(bdev->bd_disk))
ret = ns_head_update_nuse(head);
else
ret = ns_update_nuse(bdev->bd_disk->private_data);
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 3692b56cb58d..28bc2f373cfa 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -37,6 +37,14 @@ module_param(so_priority, int, 0644);
MODULE_PARM_DESC(so_priority, "nvme tcp socket optimize priority");
/*
+ * Use the unbound workqueue for nvme_tcp_wq, then we can set the cpu affinity
+ * from sysfs.
+ */
+static bool wq_unbound;
+module_param(wq_unbound, bool, 0644);
+MODULE_PARM_DESC(wq_unbound, "Use unbound workqueue for nvme-tcp IO context (default false)");
+
+/*
* TLS handshake timeout
*/
static int tls_handshake_timeout = 10;
@@ -352,12 +360,18 @@ static inline void nvme_tcp_send_all(struct nvme_tcp_queue *queue)
} while (ret > 0);
}
-static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+static inline bool nvme_tcp_queue_has_pending(struct nvme_tcp_queue *queue)
{
return !list_empty(&queue->send_list) ||
!llist_empty(&queue->req_list);
}
+static inline bool nvme_tcp_queue_more(struct nvme_tcp_queue *queue)
+{
+ return !nvme_tcp_tls(&queue->ctrl->ctrl) &&
+ nvme_tcp_queue_has_pending(queue);
+}
+
static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
bool sync, bool last)
{
@@ -378,7 +392,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
mutex_unlock(&queue->send_mutex);
}
- if (last && nvme_tcp_queue_more(queue))
+ if (last && nvme_tcp_queue_has_pending(queue))
queue_work_on(queue->io_cpu, nvme_tcp_wq, &queue->io_work);
}
@@ -1546,7 +1560,10 @@ static void nvme_tcp_set_queue_io_cpu(struct nvme_tcp_queue *queue)
else if (nvme_tcp_poll_queue(queue))
n = qid - ctrl->io_queues[HCTX_TYPE_DEFAULT] -
ctrl->io_queues[HCTX_TYPE_READ] - 1;
- queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
+ if (wq_unbound)
+ queue->io_cpu = WORK_CPU_UNBOUND;
+ else
+ queue->io_cpu = cpumask_next_wrap(n - 1, cpu_online_mask, -1, false);
}
static void nvme_tcp_tls_done(void *data, int status, key_serial_t pskid)
@@ -2785,6 +2802,8 @@ static struct nvmf_transport_ops nvme_tcp_transport = {
static int __init nvme_tcp_init_module(void)
{
+ unsigned int wq_flags = WQ_MEM_RECLAIM | WQ_HIGHPRI | WQ_SYSFS;
+
BUILD_BUG_ON(sizeof(struct nvme_tcp_hdr) != 8);
BUILD_BUG_ON(sizeof(struct nvme_tcp_cmd_pdu) != 72);
BUILD_BUG_ON(sizeof(struct nvme_tcp_data_pdu) != 24);
@@ -2794,8 +2813,10 @@ static int __init nvme_tcp_init_module(void)
BUILD_BUG_ON(sizeof(struct nvme_tcp_icresp_pdu) != 128);
BUILD_BUG_ON(sizeof(struct nvme_tcp_term_pdu) != 24);
- nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq",
- WQ_MEM_RECLAIM | WQ_HIGHPRI, 0);
+ if (wq_unbound)
+ wq_flags |= WQ_UNBOUND;
+
+ nvme_tcp_wq = alloc_workqueue("nvme_tcp_wq", wq_flags, 0);
if (!nvme_tcp_wq)
return -ENOMEM;
diff --git a/drivers/nvme/host/trace.c b/drivers/nvme/host/trace.c
index 1c36fcedea20..0288315f0050 100644
--- a/drivers/nvme/host/trace.c
+++ b/drivers/nvme/host/trace.c
@@ -119,7 +119,10 @@ static const char *nvme_trace_get_lba_status(struct trace_seq *p,
static const char *nvme_trace_admin_format_nvm(struct trace_seq *p, u8 *cdw10)
{
const char *ret = trace_seq_buffer_ptr(p);
- u8 lbaf = cdw10[0] & 0xF;
+ /*
+ * lbafu(bit 13:12) is already in the upper 4 bits, lbafl: bit 03:00.
+ */
+ u8 lbaf = (cdw10[1] & 0x30) | (cdw10[0] & 0xF);
u8 mset = (cdw10[0] >> 4) & 0x1;
u8 pi = (cdw10[0] >> 5) & 0x7;
u8 pil = cdw10[1] & 0x1;
@@ -164,12 +167,27 @@ static const char *nvme_trace_dsm(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10)
{
+ static const char * const zsa_strs[] = {
+ [0x01] = "close zone",
+ [0x02] = "finish zone",
+ [0x03] = "open zone",
+ [0x04] = "reset zone",
+ [0x05] = "offline zone",
+ [0x10] = "set zone descriptor extension"
+ };
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
+ const char *zsa_str;
u8 zsa = cdw10[12];
u8 all = cdw10[13];
- trace_seq_printf(p, "slba=%llu, zsa=%u, all=%u", slba, zsa, all);
+ if (zsa < ARRAY_SIZE(zsa_strs) && zsa_strs[zsa])
+ zsa_str = zsa_strs[zsa];
+ else
+ zsa_str = "reserved";
+
+ trace_seq_printf(p, "slba=%llu, zsa=%u:%s, all=%u",
+ slba, zsa, zsa_str, all);
trace_seq_putc(p, 0);
return ret;
@@ -177,15 +195,86 @@ static const char *nvme_trace_zone_mgmt_send(struct trace_seq *p, u8 *cdw10)
static const char *nvme_trace_zone_mgmt_recv(struct trace_seq *p, u8 *cdw10)
{
+ static const char * const zrasf_strs[] = {
+ [0x00] = "list all zones",
+ [0x01] = "list the zones in the ZSE: Empty state",
+ [0x02] = "list the zones in the ZSIO: Implicitly Opened state",
+ [0x03] = "list the zones in the ZSEO: Explicitly Opened state",
+ [0x04] = "list the zones in the ZSC: Closed state",
+ [0x05] = "list the zones in the ZSF: Full state",
+ [0x06] = "list the zones in the ZSRO: Read Only state",
+ [0x07] = "list the zones in the ZSO: Offline state",
+ [0x09] = "list the zones that have the zone attribute"
+ };
const char *ret = trace_seq_buffer_ptr(p);
u64 slba = get_unaligned_le64(cdw10);
u32 numd = get_unaligned_le32(cdw10 + 8);
u8 zra = cdw10[12];
u8 zrasf = cdw10[13];
+ const char *zrasf_str;
u8 pr = cdw10[14];
- trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u, pr=%u",
- slba, numd, zra, zrasf, pr);
+ if (zrasf < ARRAY_SIZE(zrasf_strs) && zrasf_strs[zrasf])
+ zrasf_str = zrasf_strs[zrasf];
+ else
+ zrasf_str = "reserved";
+
+ trace_seq_printf(p, "slba=%llu, numd=%u, zra=%u, zrasf=%u:%s, pr=%u",
+ slba, numd, zra, zrasf, zrasf_str, pr);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_resv_reg(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 rrega = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 ptpl = (cdw10[3] >> 6) & 0x3;
+
+ trace_seq_printf(p, "rrega=%u, iekey=%u, ptpl=%u",
+ rrega, iekey, ptpl);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_resv_acq(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 racqa = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 rtype = cdw10[1];
+
+ trace_seq_printf(p, "racqa=%u, iekey=%u, rtype=%u",
+ racqa, iekey, rtype);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_resv_rel(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u8 rrela = cdw10[0] & 0x7;
+ u8 iekey = (cdw10[0] >> 3) & 0x1;
+ u8 rtype = cdw10[1];
+
+ trace_seq_printf(p, "rrela=%u, iekey=%u, rtype=%u",
+ rrela, iekey, rtype);
+ trace_seq_putc(p, 0);
+
+ return ret;
+}
+
+static const char *nvme_trace_resv_report(struct trace_seq *p, u8 *cdw10)
+{
+ const char *ret = trace_seq_buffer_ptr(p);
+ u32 numd = get_unaligned_le32(cdw10);
+ u8 eds = cdw10[4] & 0x1;
+
+ trace_seq_printf(p, "numd=%u, eds=%u", numd, eds);
trace_seq_putc(p, 0);
return ret;
@@ -243,6 +332,14 @@ const char *nvme_trace_parse_nvm_cmd(struct trace_seq *p,
return nvme_trace_zone_mgmt_send(p, cdw10);
case nvme_cmd_zone_mgmt_recv:
return nvme_trace_zone_mgmt_recv(p, cdw10);
+ case nvme_cmd_resv_register:
+ return nvme_trace_resv_reg(p, cdw10);
+ case nvme_cmd_resv_acquire:
+ return nvme_trace_resv_acq(p, cdw10);
+ case nvme_cmd_resv_release:
+ return nvme_trace_resv_rel(p, cdw10);
+ case nvme_cmd_resv_report:
+ return nvme_trace_resv_report(p, cdw10);
default:
return nvme_trace_common(p, cdw10);
}
diff --git a/drivers/nvme/host/zns.c b/drivers/nvme/host/zns.c
index 722384bcc765..77aa0f440a6d 100644
--- a/drivers/nvme/host/zns.c
+++ b/drivers/nvme/host/zns.c
@@ -35,8 +35,8 @@ static int nvme_set_max_append(struct nvme_ctrl *ctrl)
return 0;
}
-int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
- struct queue_limits *lim)
+int nvme_query_zone_info(struct nvme_ns *ns, unsigned lbaf,
+ struct nvme_zone_info *zi)
{
struct nvme_effects_log *log = ns->head->effects;
struct nvme_command c = { };
@@ -89,27 +89,34 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf,
goto free_data;
}
- ns->head->zsze =
- nvme_lba_to_sect(ns->head, le64_to_cpu(id->lbafe[lbaf].zsze));
- if (!is_power_of_2(ns->head->zsze)) {
+ zi->zone_size = le64_to_cpu(id->lbafe[lbaf].zsze);
+ if (!is_power_of_2(zi->zone_size)) {
dev_warn(ns->ctrl->device,
- "invalid zone size:%llu for namespace:%u\n",
- ns->head->zsze, ns->head->ns_id);
+ "invalid zone size: %llu for namespace: %u\n",
+ zi->zone_size, ns->head->ns_id);
status = -ENODEV;
goto free_data;
}
+ zi->max_open_zones = le32_to_cpu(id->mor) + 1;
+ zi->max_active_zones = le32_to_cpu(id->mar) + 1;
- blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
- lim->zoned = 1;
- lim->max_open_zones = le32_to_cpu(id->mor) + 1;
- lim->max_active_zones = le32_to_cpu(id->mar) + 1;
- lim->chunk_sectors = ns->head->zsze;
- lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
free_data:
kfree(id);
return status;
}
+void nvme_update_zone_info(struct nvme_ns *ns, struct queue_limits *lim,
+ struct nvme_zone_info *zi)
+{
+ lim->zoned = 1;
+ lim->max_open_zones = zi->max_open_zones;
+ lim->max_active_zones = zi->max_active_zones;
+ lim->max_zone_append_sectors = ns->ctrl->max_zone_append;
+ lim->chunk_sectors = ns->head->zsze =
+ nvme_lba_to_sect(ns->head, zi->zone_size);
+ blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, ns->queue);
+}
+
static void *nvme_zns_alloc_report_buffer(struct nvme_ns *ns,
unsigned int nr_zones, size_t *buflen)
{