summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/core.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r--drivers/nvme/host/core.c294
1 files changed, 175 insertions, 119 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 3d00ea4e7146..05aa568a60af 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -366,6 +366,16 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl,
break;
}
break;
+ case NVME_CTRL_DELETING_NOIO:
+ switch (old_state) {
+ case NVME_CTRL_DELETING:
+ case NVME_CTRL_DEAD:
+ changed = true;
+ /* FALLTHRU */
+ default:
+ break;
+ }
+ break;
case NVME_CTRL_DEAD:
switch (old_state) {
case NVME_CTRL_DELETING:
@@ -403,6 +413,7 @@ static bool nvme_state_terminal(struct nvme_ctrl *ctrl)
case NVME_CTRL_CONNECTING:
return false;
case NVME_CTRL_DELETING:
+ case NVME_CTRL_DELETING_NOIO:
case NVME_CTRL_DEAD:
return true;
default:
@@ -454,10 +465,11 @@ static void nvme_free_ns(struct kref *kref)
kfree(ns);
}
-static void nvme_put_ns(struct nvme_ns *ns)
+void nvme_put_ns(struct nvme_ns *ns)
{
kref_put(&ns->kref, nvme_free_ns);
}
+EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU);
static inline void nvme_clear_nvme_request(struct request *req)
{
@@ -593,6 +605,14 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl,
req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9;
}
+static void nvme_setup_passthrough(struct request *req,
+ struct nvme_command *cmd)
+{
+ memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
+ /* passthru commands should let the driver set the SGL flags */
+ cmd->common.flags &= ~NVME_CMD_SGL_ALL;
+}
+
static inline void nvme_setup_flush(struct nvme_ns *ns,
struct nvme_command *cmnd)
{
@@ -758,7 +778,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req,
switch (req_op(req)) {
case REQ_OP_DRV_IN:
case REQ_OP_DRV_OUT:
- memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd));
+ nvme_setup_passthrough(req, cmd);
break;
case REQ_OP_FLUSH:
nvme_setup_flush(ns, cmd);
@@ -909,6 +929,120 @@ out:
return ERR_PTR(ret);
}
+static u32 nvme_known_admin_effects(u8 opcode)
+{
+ switch (opcode) {
+ case nvme_admin_format_nvm:
+ return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
+ NVME_CMD_EFFECTS_CSE_MASK;
+ case nvme_admin_sanitize_nvm:
+ return NVME_CMD_EFFECTS_CSE_MASK;
+ default:
+ break;
+ }
+ return 0;
+}
+
+u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode)
+{
+ u32 effects = 0;
+
+ if (ns) {
+ if (ns->head->effects)
+ effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
+ if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
+ dev_warn(ctrl->device,
+ "IO command:%02x has unhandled effects:%08x\n",
+ opcode, effects);
+ return 0;
+ }
+
+ if (ctrl->effects)
+ effects = le32_to_cpu(ctrl->effects->acs[opcode]);
+ effects |= nvme_known_admin_effects(opcode);
+
+ return effects;
+}
+EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU);
+
+static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
+ u8 opcode)
+{
+ u32 effects = nvme_command_effects(ctrl, ns, opcode);
+
+ /*
+ * For simplicity, IO to all namespaces is quiesced even if the command
+ * effects say only one namespace is affected.
+ */
+ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+ mutex_lock(&ctrl->scan_lock);
+ mutex_lock(&ctrl->subsys->lock);
+ nvme_mpath_start_freeze(ctrl->subsys);
+ nvme_mpath_wait_freeze(ctrl->subsys);
+ nvme_start_freeze(ctrl);
+ nvme_wait_freeze(ctrl);
+ }
+ return effects;
+}
+
+static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
+{
+ struct nvme_ns *ns;
+
+ down_read(&ctrl->namespaces_rwsem);
+ list_for_each_entry(ns, &ctrl->namespaces, list)
+ if (_nvme_revalidate_disk(ns->disk))
+ nvme_set_queue_dying(ns);
+ else if (blk_queue_is_zoned(ns->disk->queue)) {
+ /*
+ * IO commands are required to fully revalidate a zoned
+ * device. Force the command effects to trigger rescan
+ * work so report zones can run in a context with
+ * unfrozen IO queues.
+ */
+ *effects |= NVME_CMD_EFFECTS_NCC;
+ }
+ up_read(&ctrl->namespaces_rwsem);
+}
+
+static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
+{
+ /*
+ * Revalidate LBA changes prior to unfreezing. This is necessary to
+ * prevent memory corruption if a logical block size was changed by
+ * this command.
+ */
+ if (effects & NVME_CMD_EFFECTS_LBCC)
+ nvme_update_formats(ctrl, &effects);
+ if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
+ nvme_unfreeze(ctrl);
+ nvme_mpath_unfreeze(ctrl->subsys);
+ mutex_unlock(&ctrl->subsys->lock);
+ nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
+ mutex_unlock(&ctrl->scan_lock);
+ }
+ if (effects & NVME_CMD_EFFECTS_CCC)
+ nvme_init_identify(ctrl);
+ if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
+ nvme_queue_scan(ctrl);
+ flush_work(&ctrl->scan_work);
+ }
+}
+
+void nvme_execute_passthru_rq(struct request *rq)
+{
+ struct nvme_command *cmd = nvme_req(rq)->cmd;
+ struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl;
+ struct nvme_ns *ns = rq->q->queuedata;
+ struct gendisk *disk = ns ? ns->disk : NULL;
+ u32 effects;
+
+ effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode);
+ blk_execute_rq(rq->q, disk, rq, 0);
+ nvme_passthru_end(ctrl, effects);
+}
+EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU);
+
static int nvme_submit_user_cmd(struct request_queue *q,
struct nvme_command *cmd, void __user *ubuffer,
unsigned bufflen, void __user *meta_buffer, unsigned meta_len,
@@ -947,7 +1081,7 @@ static int nvme_submit_user_cmd(struct request_queue *q,
}
}
- blk_execute_rq(req->q, disk, req, 0);
+ nvme_execute_passthru_rq(req);
if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
ret = -EINTR;
else
@@ -1375,105 +1509,12 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
metadata, meta_len, lower_32_bits(io.slba), NULL, 0);
}
-static u32 nvme_known_admin_effects(u8 opcode)
-{
- switch (opcode) {
- case nvme_admin_format_nvm:
- return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC |
- NVME_CMD_EFFECTS_CSE_MASK;
- case nvme_admin_sanitize_nvm:
- return NVME_CMD_EFFECTS_CSE_MASK;
- default:
- break;
- }
- return 0;
-}
-
-static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
- u8 opcode)
-{
- u32 effects = 0;
-
- if (ns) {
- if (ns->head->effects)
- effects = le32_to_cpu(ns->head->effects->iocs[opcode]);
- if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC))
- dev_warn(ctrl->device,
- "IO command:%02x has unhandled effects:%08x\n",
- opcode, effects);
- return 0;
- }
-
- if (ctrl->effects)
- effects = le32_to_cpu(ctrl->effects->acs[opcode]);
- effects |= nvme_known_admin_effects(opcode);
-
- /*
- * For simplicity, IO to all namespaces is quiesced even if the command
- * effects say only one namespace is affected.
- */
- if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
- mutex_lock(&ctrl->scan_lock);
- mutex_lock(&ctrl->subsys->lock);
- nvme_mpath_start_freeze(ctrl->subsys);
- nvme_mpath_wait_freeze(ctrl->subsys);
- nvme_start_freeze(ctrl);
- nvme_wait_freeze(ctrl);
- }
- return effects;
-}
-
-static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects)
-{
- struct nvme_ns *ns;
-
- down_read(&ctrl->namespaces_rwsem);
- list_for_each_entry(ns, &ctrl->namespaces, list)
- if (_nvme_revalidate_disk(ns->disk))
- nvme_set_queue_dying(ns);
- else if (blk_queue_is_zoned(ns->disk->queue)) {
- /*
- * IO commands are required to fully revalidate a zoned
- * device. Force the command effects to trigger rescan
- * work so report zones can run in a context with
- * unfrozen IO queues.
- */
- *effects |= NVME_CMD_EFFECTS_NCC;
- }
- up_read(&ctrl->namespaces_rwsem);
-}
-
-static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects)
-{
- /*
- * Revalidate LBA changes prior to unfreezing. This is necessary to
- * prevent memory corruption if a logical block size was changed by
- * this command.
- */
- if (effects & NVME_CMD_EFFECTS_LBCC)
- nvme_update_formats(ctrl, &effects);
- if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) {
- nvme_unfreeze(ctrl);
- nvme_mpath_unfreeze(ctrl->subsys);
- mutex_unlock(&ctrl->subsys->lock);
- nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL);
- mutex_unlock(&ctrl->scan_lock);
- }
- if (effects & NVME_CMD_EFFECTS_CCC)
- nvme_init_identify(ctrl);
- if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) {
- nvme_queue_scan(ctrl);
- flush_work(&ctrl->scan_work);
- }
-}
-
static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd __user *ucmd)
{
struct nvme_passthru_cmd cmd;
struct nvme_command c;
unsigned timeout = 0;
- u32 effects;
u64 result;
int status;
@@ -1500,12 +1541,10 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
- effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
nvme_to_user_ptr(cmd.addr), cmd.data_len,
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
0, &result, timeout);
- nvme_passthru_end(ctrl, effects);
if (status >= 0) {
if (put_user(result, &ucmd->result))
@@ -1521,7 +1560,6 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
struct nvme_passthru_cmd64 cmd;
struct nvme_command c;
unsigned timeout = 0;
- u32 effects;
int status;
if (!capable(CAP_SYS_ADMIN))
@@ -1547,12 +1585,10 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns,
if (cmd.timeout_ms)
timeout = msecs_to_jiffies(cmd.timeout_ms);
- effects = nvme_passthru_start(ctrl, ns, cmd.opcode);
status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c,
nvme_to_user_ptr(cmd.addr), cmd.data_len,
nvme_to_user_ptr(cmd.metadata), cmd.metadata_len,
0, &cmd.result, timeout);
- nvme_passthru_end(ctrl, effects);
if (status >= 0) {
if (put_user(cmd.result, &ucmd->result))
@@ -2345,12 +2381,7 @@ EXPORT_SYMBOL_GPL(nvme_disable_ctrl);
int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
{
- /*
- * Default to a 4K page size, with the intention to update this
- * path in the future to accomodate architectures with differing
- * kernel and IO page sizes.
- */
- unsigned dev_page_min, page_shift = 12;
+ unsigned dev_page_min;
int ret;
ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap);
@@ -2360,20 +2391,18 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl)
}
dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12;
- if (page_shift < dev_page_min) {
+ if (NVME_CTRL_PAGE_SHIFT < dev_page_min) {
dev_err(ctrl->device,
"Minimum device page size %u too large for host (%u)\n",
- 1 << dev_page_min, 1 << page_shift);
+ 1 << dev_page_min, 1 << NVME_CTRL_PAGE_SHIFT);
return -ENODEV;
}
- ctrl->page_size = 1 << page_shift;
-
if (NVME_CAP_CSS(ctrl->cap) & NVME_CAP_CSS_CSI)
ctrl->ctrl_config = NVME_CC_CSS_CSI;
else
ctrl->ctrl_config = NVME_CC_CSS_NVM;
- ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
+ ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE;
ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
ctrl->ctrl_config |= NVME_CC_ENABLE;
@@ -2423,13 +2452,13 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl,
if (ctrl->max_hw_sectors) {
u32 max_segments =
- (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1;
+ (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1;
max_segments = min_not_zero(max_segments, ctrl->max_segments);
blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors);
blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX));
}
- blk_queue_virt_boundary(q, ctrl->page_size - 1);
+ blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1);
blk_queue_dma_alignment(q, 7);
if (ctrl->vwc & NVME_CTRL_VWC_PRESENT)
vwc = true;
@@ -3483,6 +3512,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev,
[NVME_CTRL_RESETTING] = "resetting",
[NVME_CTRL_CONNECTING] = "connecting",
[NVME_CTRL_DELETING] = "deleting",
+ [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)",
[NVME_CTRL_DEAD] = "dead",
};
@@ -3586,8 +3616,8 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev,
int err;
err = kstrtou32(buf, 10, &v);
- if (err || v > UINT_MAX)
- return -EINVAL;
+ if (err)
+ return err;
ctrl->opts->reconnect_delay = v;
return count;
@@ -3798,7 +3828,7 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b)
return nsa->head->ns_id - nsb->head->ns_id;
}
-static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
+struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
struct nvme_ns *ns, *ret = NULL;
@@ -3816,6 +3846,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid)
up_read(&ctrl->namespaces_rwsem);
return ret;
}
+EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU);
static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
{
@@ -4119,6 +4150,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl)
if (ctrl->state == NVME_CTRL_DEAD)
nvme_kill_queues(ctrl);
+ /* this is a no-op when called from the controller reset handler */
+ nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO);
+
down_write(&ctrl->namespaces_rwsem);
list_splice_init(&ctrl->namespaces, &ns_list);
up_write(&ctrl->namespaces_rwsem);
@@ -4313,8 +4347,7 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl);
void nvme_start_ctrl(struct nvme_ctrl *ctrl)
{
- if (ctrl->kato)
- nvme_start_keep_alive(ctrl);
+ nvme_start_keep_alive(ctrl);
nvme_enable_aen(ctrl);
@@ -4559,6 +4592,29 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl)
}
EXPORT_SYMBOL_GPL(nvme_sync_queues);
+struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path)
+{
+ struct nvme_ctrl *ctrl;
+ struct file *f;
+
+ f = filp_open(path, O_RDWR, 0);
+ if (IS_ERR(f))
+ return ERR_CAST(f);
+
+ if (f->f_op != &nvme_dev_fops) {
+ ctrl = ERR_PTR(-EINVAL);
+ goto out_close;
+ }
+
+ ctrl = f->private_data;
+ nvme_get_ctrl(ctrl);
+
+out_close:
+ filp_close(f, NULL);
+ return ctrl;
+}
+EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU);
+
/*
* Check we didn't inadvertently grow the command structure sizes:
*/