diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 294 |
1 files changed, 175 insertions, 119 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 3d00ea4e7146..05aa568a60af 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -366,6 +366,16 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, break; } break; + case NVME_CTRL_DELETING_NOIO: + switch (old_state) { + case NVME_CTRL_DELETING: + case NVME_CTRL_DEAD: + changed = true; + /* FALLTHRU */ + default: + break; + } + break; case NVME_CTRL_DEAD: switch (old_state) { case NVME_CTRL_DELETING: @@ -403,6 +413,7 @@ static bool nvme_state_terminal(struct nvme_ctrl *ctrl) case NVME_CTRL_CONNECTING: return false; case NVME_CTRL_DELETING: + case NVME_CTRL_DELETING_NOIO: case NVME_CTRL_DEAD: return true; default: @@ -454,10 +465,11 @@ static void nvme_free_ns(struct kref *kref) kfree(ns); } -static void nvme_put_ns(struct nvme_ns *ns) +void nvme_put_ns(struct nvme_ns *ns) { kref_put(&ns->kref, nvme_free_ns); } +EXPORT_SYMBOL_NS_GPL(nvme_put_ns, NVME_TARGET_PASSTHRU); static inline void nvme_clear_nvme_request(struct request *req) { @@ -593,6 +605,14 @@ static void nvme_assign_write_stream(struct nvme_ctrl *ctrl, req->q->write_hints[streamid] += blk_rq_bytes(req) >> 9; } +static void nvme_setup_passthrough(struct request *req, + struct nvme_command *cmd) +{ + memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); + /* passthru commands should let the driver set the SGL flags */ + cmd->common.flags &= ~NVME_CMD_SGL_ALL; +} + static inline void nvme_setup_flush(struct nvme_ns *ns, struct nvme_command *cmnd) { @@ -758,7 +778,7 @@ blk_status_t nvme_setup_cmd(struct nvme_ns *ns, struct request *req, switch (req_op(req)) { case REQ_OP_DRV_IN: case REQ_OP_DRV_OUT: - memcpy(cmd, nvme_req(req)->cmd, sizeof(*cmd)); + nvme_setup_passthrough(req, cmd); break; case REQ_OP_FLUSH: nvme_setup_flush(ns, cmd); @@ -909,6 +929,120 @@ out: return ERR_PTR(ret); } +static u32 nvme_known_admin_effects(u8 opcode) +{ + switch (opcode) { + case nvme_admin_format_nvm: + return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | + NVME_CMD_EFFECTS_CSE_MASK; + case nvme_admin_sanitize_nvm: + return NVME_CMD_EFFECTS_CSE_MASK; + default: + break; + } + return 0; +} + +u32 nvme_command_effects(struct nvme_ctrl *ctrl, struct nvme_ns *ns, u8 opcode) +{ + u32 effects = 0; + + if (ns) { + if (ns->head->effects) + effects = le32_to_cpu(ns->head->effects->iocs[opcode]); + if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) + dev_warn(ctrl->device, + "IO command:%02x has unhandled effects:%08x\n", + opcode, effects); + return 0; + } + + if (ctrl->effects) + effects = le32_to_cpu(ctrl->effects->acs[opcode]); + effects |= nvme_known_admin_effects(opcode); + + return effects; +} +EXPORT_SYMBOL_NS_GPL(nvme_command_effects, NVME_TARGET_PASSTHRU); + +static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, + u8 opcode) +{ + u32 effects = nvme_command_effects(ctrl, ns, opcode); + + /* + * For simplicity, IO to all namespaces is quiesced even if the command + * effects say only one namespace is affected. + */ + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + mutex_lock(&ctrl->scan_lock); + mutex_lock(&ctrl->subsys->lock); + nvme_mpath_start_freeze(ctrl->subsys); + nvme_mpath_wait_freeze(ctrl->subsys); + nvme_start_freeze(ctrl); + nvme_wait_freeze(ctrl); + } + return effects; +} + +static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects) +{ + struct nvme_ns *ns; + + down_read(&ctrl->namespaces_rwsem); + list_for_each_entry(ns, &ctrl->namespaces, list) + if (_nvme_revalidate_disk(ns->disk)) + nvme_set_queue_dying(ns); + else if (blk_queue_is_zoned(ns->disk->queue)) { + /* + * IO commands are required to fully revalidate a zoned + * device. Force the command effects to trigger rescan + * work so report zones can run in a context with + * unfrozen IO queues. + */ + *effects |= NVME_CMD_EFFECTS_NCC; + } + up_read(&ctrl->namespaces_rwsem); +} + +static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) +{ + /* + * Revalidate LBA changes prior to unfreezing. This is necessary to + * prevent memory corruption if a logical block size was changed by + * this command. + */ + if (effects & NVME_CMD_EFFECTS_LBCC) + nvme_update_formats(ctrl, &effects); + if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { + nvme_unfreeze(ctrl); + nvme_mpath_unfreeze(ctrl->subsys); + mutex_unlock(&ctrl->subsys->lock); + nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); + mutex_unlock(&ctrl->scan_lock); + } + if (effects & NVME_CMD_EFFECTS_CCC) + nvme_init_identify(ctrl); + if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { + nvme_queue_scan(ctrl); + flush_work(&ctrl->scan_work); + } +} + +void nvme_execute_passthru_rq(struct request *rq) +{ + struct nvme_command *cmd = nvme_req(rq)->cmd; + struct nvme_ctrl *ctrl = nvme_req(rq)->ctrl; + struct nvme_ns *ns = rq->q->queuedata; + struct gendisk *disk = ns ? ns->disk : NULL; + u32 effects; + + effects = nvme_passthru_start(ctrl, ns, cmd->common.opcode); + blk_execute_rq(rq->q, disk, rq, 0); + nvme_passthru_end(ctrl, effects); +} +EXPORT_SYMBOL_NS_GPL(nvme_execute_passthru_rq, NVME_TARGET_PASSTHRU); + static int nvme_submit_user_cmd(struct request_queue *q, struct nvme_command *cmd, void __user *ubuffer, unsigned bufflen, void __user *meta_buffer, unsigned meta_len, @@ -947,7 +1081,7 @@ static int nvme_submit_user_cmd(struct request_queue *q, } } - blk_execute_rq(req->q, disk, req, 0); + nvme_execute_passthru_rq(req); if (nvme_req(req)->flags & NVME_REQ_CANCELLED) ret = -EINTR; else @@ -1375,105 +1509,12 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio) metadata, meta_len, lower_32_bits(io.slba), NULL, 0); } -static u32 nvme_known_admin_effects(u8 opcode) -{ - switch (opcode) { - case nvme_admin_format_nvm: - return NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC | - NVME_CMD_EFFECTS_CSE_MASK; - case nvme_admin_sanitize_nvm: - return NVME_CMD_EFFECTS_CSE_MASK; - default: - break; - } - return 0; -} - -static u32 nvme_passthru_start(struct nvme_ctrl *ctrl, struct nvme_ns *ns, - u8 opcode) -{ - u32 effects = 0; - - if (ns) { - if (ns->head->effects) - effects = le32_to_cpu(ns->head->effects->iocs[opcode]); - if (effects & ~(NVME_CMD_EFFECTS_CSUPP | NVME_CMD_EFFECTS_LBCC)) - dev_warn(ctrl->device, - "IO command:%02x has unhandled effects:%08x\n", - opcode, effects); - return 0; - } - - if (ctrl->effects) - effects = le32_to_cpu(ctrl->effects->acs[opcode]); - effects |= nvme_known_admin_effects(opcode); - - /* - * For simplicity, IO to all namespaces is quiesced even if the command - * effects say only one namespace is affected. - */ - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { - mutex_lock(&ctrl->scan_lock); - mutex_lock(&ctrl->subsys->lock); - nvme_mpath_start_freeze(ctrl->subsys); - nvme_mpath_wait_freeze(ctrl->subsys); - nvme_start_freeze(ctrl); - nvme_wait_freeze(ctrl); - } - return effects; -} - -static void nvme_update_formats(struct nvme_ctrl *ctrl, u32 *effects) -{ - struct nvme_ns *ns; - - down_read(&ctrl->namespaces_rwsem); - list_for_each_entry(ns, &ctrl->namespaces, list) - if (_nvme_revalidate_disk(ns->disk)) - nvme_set_queue_dying(ns); - else if (blk_queue_is_zoned(ns->disk->queue)) { - /* - * IO commands are required to fully revalidate a zoned - * device. Force the command effects to trigger rescan - * work so report zones can run in a context with - * unfrozen IO queues. - */ - *effects |= NVME_CMD_EFFECTS_NCC; - } - up_read(&ctrl->namespaces_rwsem); -} - -static void nvme_passthru_end(struct nvme_ctrl *ctrl, u32 effects) -{ - /* - * Revalidate LBA changes prior to unfreezing. This is necessary to - * prevent memory corruption if a logical block size was changed by - * this command. - */ - if (effects & NVME_CMD_EFFECTS_LBCC) - nvme_update_formats(ctrl, &effects); - if (effects & (NVME_CMD_EFFECTS_LBCC | NVME_CMD_EFFECTS_CSE_MASK)) { - nvme_unfreeze(ctrl); - nvme_mpath_unfreeze(ctrl->subsys); - mutex_unlock(&ctrl->subsys->lock); - nvme_remove_invalid_namespaces(ctrl, NVME_NSID_ALL); - mutex_unlock(&ctrl->scan_lock); - } - if (effects & NVME_CMD_EFFECTS_CCC) - nvme_init_identify(ctrl); - if (effects & (NVME_CMD_EFFECTS_NIC | NVME_CMD_EFFECTS_NCC)) { - nvme_queue_scan(ctrl); - flush_work(&ctrl->scan_work); - } -} - static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd __user *ucmd) { struct nvme_passthru_cmd cmd; struct nvme_command c; unsigned timeout = 0; - u32 effects; u64 result; int status; @@ -1500,12 +1541,10 @@ static int nvme_user_cmd(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); - effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &result, timeout); - nvme_passthru_end(ctrl, effects); if (status >= 0) { if (put_user(result, &ucmd->result)) @@ -1521,7 +1560,6 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, struct nvme_passthru_cmd64 cmd; struct nvme_command c; unsigned timeout = 0; - u32 effects; int status; if (!capable(CAP_SYS_ADMIN)) @@ -1547,12 +1585,10 @@ static int nvme_user_cmd64(struct nvme_ctrl *ctrl, struct nvme_ns *ns, if (cmd.timeout_ms) timeout = msecs_to_jiffies(cmd.timeout_ms); - effects = nvme_passthru_start(ctrl, ns, cmd.opcode); status = nvme_submit_user_cmd(ns ? ns->queue : ctrl->admin_q, &c, nvme_to_user_ptr(cmd.addr), cmd.data_len, nvme_to_user_ptr(cmd.metadata), cmd.metadata_len, 0, &cmd.result, timeout); - nvme_passthru_end(ctrl, effects); if (status >= 0) { if (put_user(cmd.result, &ucmd->result)) @@ -2345,12 +2381,7 @@ EXPORT_SYMBOL_GPL(nvme_disable_ctrl); int nvme_enable_ctrl(struct nvme_ctrl *ctrl) { - /* - * Default to a 4K page size, with the intention to update this - * path in the future to accomodate architectures with differing - * kernel and IO page sizes. - */ - unsigned dev_page_min, page_shift = 12; + unsigned dev_page_min; int ret; ret = ctrl->ops->reg_read64(ctrl, NVME_REG_CAP, &ctrl->cap); @@ -2360,20 +2391,18 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl) } dev_page_min = NVME_CAP_MPSMIN(ctrl->cap) + 12; - if (page_shift < dev_page_min) { + if (NVME_CTRL_PAGE_SHIFT < dev_page_min) { dev_err(ctrl->device, "Minimum device page size %u too large for host (%u)\n", - 1 << dev_page_min, 1 << page_shift); + 1 << dev_page_min, 1 << NVME_CTRL_PAGE_SHIFT); return -ENODEV; } - ctrl->page_size = 1 << page_shift; - if (NVME_CAP_CSS(ctrl->cap) & NVME_CAP_CSS_CSI) ctrl->ctrl_config = NVME_CC_CSS_CSI; else ctrl->ctrl_config = NVME_CC_CSS_NVM; - ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; + ctrl->ctrl_config |= (NVME_CTRL_PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT; ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_ENABLE; @@ -2423,13 +2452,13 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, if (ctrl->max_hw_sectors) { u32 max_segments = - (ctrl->max_hw_sectors / (ctrl->page_size >> 9)) + 1; + (ctrl->max_hw_sectors / (NVME_CTRL_PAGE_SIZE >> 9)) + 1; max_segments = min_not_zero(max_segments, ctrl->max_segments); blk_queue_max_hw_sectors(q, ctrl->max_hw_sectors); blk_queue_max_segments(q, min_t(u32, max_segments, USHRT_MAX)); } - blk_queue_virt_boundary(q, ctrl->page_size - 1); + blk_queue_virt_boundary(q, NVME_CTRL_PAGE_SIZE - 1); blk_queue_dma_alignment(q, 7); if (ctrl->vwc & NVME_CTRL_VWC_PRESENT) vwc = true; @@ -3483,6 +3512,7 @@ static ssize_t nvme_sysfs_show_state(struct device *dev, [NVME_CTRL_RESETTING] = "resetting", [NVME_CTRL_CONNECTING] = "connecting", [NVME_CTRL_DELETING] = "deleting", + [NVME_CTRL_DELETING_NOIO]= "deleting (no IO)", [NVME_CTRL_DEAD] = "dead", }; @@ -3586,8 +3616,8 @@ static ssize_t nvme_ctrl_reconnect_delay_store(struct device *dev, int err; err = kstrtou32(buf, 10, &v); - if (err || v > UINT_MAX) - return -EINVAL; + if (err) + return err; ctrl->opts->reconnect_delay = v; return count; @@ -3798,7 +3828,7 @@ static int ns_cmp(void *priv, struct list_head *a, struct list_head *b) return nsa->head->ns_id - nsb->head->ns_id; } -static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) +struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) { struct nvme_ns *ns, *ret = NULL; @@ -3816,6 +3846,7 @@ static struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) up_read(&ctrl->namespaces_rwsem); return ret; } +EXPORT_SYMBOL_NS_GPL(nvme_find_get_ns, NVME_TARGET_PASSTHRU); static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) { @@ -4119,6 +4150,9 @@ void nvme_remove_namespaces(struct nvme_ctrl *ctrl) if (ctrl->state == NVME_CTRL_DEAD) nvme_kill_queues(ctrl); + /* this is a no-op when called from the controller reset handler */ + nvme_change_ctrl_state(ctrl, NVME_CTRL_DELETING_NOIO); + down_write(&ctrl->namespaces_rwsem); list_splice_init(&ctrl->namespaces, &ns_list); up_write(&ctrl->namespaces_rwsem); @@ -4313,8 +4347,7 @@ EXPORT_SYMBOL_GPL(nvme_stop_ctrl); void nvme_start_ctrl(struct nvme_ctrl *ctrl) { - if (ctrl->kato) - nvme_start_keep_alive(ctrl); + nvme_start_keep_alive(ctrl); nvme_enable_aen(ctrl); @@ -4559,6 +4592,29 @@ void nvme_sync_queues(struct nvme_ctrl *ctrl) } EXPORT_SYMBOL_GPL(nvme_sync_queues); +struct nvme_ctrl *nvme_ctrl_get_by_path(const char *path) +{ + struct nvme_ctrl *ctrl; + struct file *f; + + f = filp_open(path, O_RDWR, 0); + if (IS_ERR(f)) + return ERR_CAST(f); + + if (f->f_op != &nvme_dev_fops) { + ctrl = ERR_PTR(-EINVAL); + goto out_close; + } + + ctrl = f->private_data; + nvme_get_ctrl(ctrl); + +out_close: + filp_close(f, NULL); + return ctrl; +} +EXPORT_SYMBOL_NS_GPL(nvme_ctrl_get_by_path, NVME_TARGET_PASSTHRU); + /* * Check we didn't inadvertently grow the command structure sizes: */ |