diff options
Diffstat (limited to 'drivers/nvme/host/core.c')
-rw-r--r-- | drivers/nvme/host/core.c | 77 |
1 files changed, 56 insertions, 21 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 84cb859a911d..855b42c92284 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -92,6 +92,17 @@ MODULE_PARM_DESC(apst_secondary_latency_tol_us, "secondary APST latency tolerance in us"); /* + * Older kernels didn't enable protection information if it was at an offset. + * Newer kernels do, so it breaks reads on the upgrade if such formats were + * used in prior kernels since the metadata written did not contain a valid + * checksum. + */ +static bool disable_pi_offsets = false; +module_param(disable_pi_offsets, bool, 0444); +MODULE_PARM_DESC(disable_pi_offsets, + "disable protection information if it has an offset"); + +/* * nvme_wq - hosts nvme related works that are not reset or delete * nvme_reset_wq - hosts nvme reset works * nvme_delete_wq - hosts nvme delete works @@ -1390,17 +1401,30 @@ static void nvme_update_keep_alive(struct nvme_ctrl *ctrl, nvme_start_keep_alive(ctrl); } -/* - * In NVMe 1.0 the CNS field was just a binary controller or namespace - * flag, thus sending any new CNS opcodes has a big chance of not working. - * Qemu unfortunately had that bug after reporting a 1.1 version compliance - * (but not for any later version). - */ -static bool nvme_ctrl_limited_cns(struct nvme_ctrl *ctrl) +static bool nvme_id_cns_ok(struct nvme_ctrl *ctrl, u8 cns) { - if (ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS) - return ctrl->vs < NVME_VS(1, 2, 0); - return ctrl->vs < NVME_VS(1, 1, 0); + /* + * The CNS field occupies a full byte starting with NVMe 1.2 + */ + if (ctrl->vs >= NVME_VS(1, 2, 0)) + return true; + + /* + * NVMe 1.1 expanded the CNS value to two bits, which means values + * larger than that could get truncated and treated as an incorrect + * value. + * + * Qemu implemented 1.0 behavior for controllers claiming 1.1 + * compliance, so they need to be quirked here. + */ + if (ctrl->vs >= NVME_VS(1, 1, 0) && + !(ctrl->quirks & NVME_QUIRK_IDENTIFY_CNS)) + return cns <= 3; + + /* + * NVMe 1.0 used a single bit for the CNS value. + */ + return cns <= 1; } static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) @@ -1913,8 +1937,12 @@ static void nvme_configure_metadata(struct nvme_ctrl *ctrl, if (head->pi_size && head->ms >= head->pi_size) head->pi_type = id->dps & NVME_NS_DPS_PI_MASK; - if (!(id->dps & NVME_NS_DPS_PI_FIRST)) - info->pi_offset = head->ms - head->pi_size; + if (!(id->dps & NVME_NS_DPS_PI_FIRST)) { + if (disable_pi_offsets) + head->pi_type = 0; + else + info->pi_offset = head->ms - head->pi_size; + } if (ctrl->ops->flags & NVME_F_FABRICS) { /* @@ -3104,7 +3132,7 @@ static int nvme_init_non_mdts_limits(struct nvme_ctrl *ctrl) ctrl->max_zeroes_sectors = 0; if (ctrl->subsys->subtype != NVME_NQN_NVME || - nvme_ctrl_limited_cns(ctrl) || + !nvme_id_cns_ok(ctrl, NVME_ID_CNS_CS_CTRL) || test_bit(NVME_CTRL_SKIP_ID_CNS_CS, &ctrl->flags)) return 0; @@ -3767,7 +3795,8 @@ struct nvme_ns *nvme_find_get_ns(struct nvme_ctrl *ctrl, unsigned nsid) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) { if (ns->head->ns_id == nsid) { if (!nvme_get_ns(ns)) continue; @@ -4200,7 +4229,7 @@ static void nvme_scan_work(struct work_struct *work) } mutex_lock(&ctrl->scan_lock); - if (nvme_ctrl_limited_cns(ctrl)) { + if (!nvme_id_cns_ok(ctrl, NVME_ID_CNS_NS_ACTIVE_LIST)) { nvme_scan_ns_sequential(ctrl); } else { /* @@ -4851,7 +4880,8 @@ void nvme_mark_namespaces_dead(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mark_disk_dead(ns->disk); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4863,7 +4893,8 @@ void nvme_unfreeze(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mq_unfreeze_queue(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); clear_bit(NVME_CTRL_FROZEN, &ctrl->flags); @@ -4876,7 +4907,8 @@ int nvme_wait_freeze_timeout(struct nvme_ctrl *ctrl, long timeout) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) { + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) { timeout = blk_mq_freeze_queue_wait_timeout(ns->queue, timeout); if (timeout <= 0) break; @@ -4892,7 +4924,8 @@ void nvme_wait_freeze(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_mq_freeze_queue_wait(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4905,7 +4938,8 @@ void nvme_start_freeze(struct nvme_ctrl *ctrl) set_bit(NVME_CTRL_FROZEN, &ctrl->flags); srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_freeze_queue_start(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } @@ -4953,7 +4987,8 @@ void nvme_sync_io_queues(struct nvme_ctrl *ctrl) int srcu_idx; srcu_idx = srcu_read_lock(&ctrl->srcu); - list_for_each_entry_rcu(ns, &ctrl->namespaces, list) + list_for_each_entry_srcu(ns, &ctrl->namespaces, list, + srcu_read_lock_held(&ctrl->srcu)) blk_sync_queue(ns->queue); srcu_read_unlock(&ctrl->srcu, srcu_idx); } |