summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2020-07-29 11:22:40 -0600
committerJens Axboe <axboe@kernel.dk>2020-07-29 11:22:40 -0600
commita9e8e18aaf914bf273ed63480c4c3d8dd626c95f (patch)
tree4193e82a579b4c627ad5139f1b87258bd9d2e44d /drivers/nvme/host/pci.c
parentc5be1f2c5bab1538aa29cd42e226d6b80391e3ff (diff)
parentb6cec06d19d90db5dbcc50034fb33983f6259b8b (diff)
Merge branch 'nvme-5.9' of git://git.infradead.org/nvme into for-5.9/drivers
Pull NVMe updates from Christoph. * 'nvme-5.9' of git://git.infradead.org/nvme: (30 commits) nvme-loop: remove extra variable in create ctrl nvme-loop: set ctrl state connecting after init nvme-multipath: do not fall back to __nvme_find_path() for non-optimized paths nvme-multipath: fix logic for non-optimized paths nvme-rdma: fix controller reset hang during traffic nvme-tcp: fix controller reset hang during traffic nvmet: introduce the passthru Kconfig option nvmet: introduce the passthru configfs interface nvmet: Add passthru enable/disable helpers nvmet: add passthru code to process commands nvme: export nvme_find_get_ns() and nvme_put_ns() nvme: introduce nvme_ctrl_get_by_path() nvme: introduce nvme_execute_passthru_rq to call nvme_passthru_[start|end]() nvme: create helper function to obtain command effects nvme: clear any SGL flags in passthru commands nvmet-fc: remove redundant del_work_active flag nvmet-fc: check successful reference in nvmet_fc_find_target_assoc nvme-fc: set max_segments to lldd max value nvme-fc: drop a duplicated word in a comment nvme-hwmon: log the controller device name ...
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c133
1 files changed, 95 insertions, 38 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 45e94f016ec2..61e612d52d61 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -4,6 +4,7 @@
* Copyright (c) 2011-2014, Intel Corporation.
*/
+#include <linux/acpi.h>
#include <linux/aer.h>
#include <linux/async.h>
#include <linux/blkdev.h>
@@ -94,6 +95,10 @@ static unsigned int poll_queues;
module_param_cb(poll_queues, &io_queue_count_ops, &poll_queues, 0644);
MODULE_PARM_DESC(poll_queues, "Number of queues to use for polled IO.");
+static bool noacpi;
+module_param(noacpi, bool, 0444);
+MODULE_PARM_DESC(noacpi, "disable acpi bios quirks");
+
struct nvme_dev;
struct nvme_queue;
@@ -346,10 +351,10 @@ static bool nvme_dbbuf_update_and_check_event(u16 value, u32 *dbbuf_db,
* as it only leads to a small amount of wasted memory for the lifetime of
* the I/O.
*/
-static int nvme_npages(unsigned size, struct nvme_dev *dev)
+static int nvme_pci_npages_prp(void)
{
- unsigned nprps = DIV_ROUND_UP(size + dev->ctrl.page_size,
- dev->ctrl.page_size);
+ unsigned nprps = DIV_ROUND_UP(NVME_MAX_KB_SZ + NVME_CTRL_PAGE_SIZE,
+ NVME_CTRL_PAGE_SIZE);
return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
}
@@ -357,22 +362,18 @@ static int nvme_npages(unsigned size, struct nvme_dev *dev)
* Calculates the number of pages needed for the SGL segments. For example a 4k
* page can accommodate 256 SGL descriptors.
*/
-static int nvme_pci_npages_sgl(unsigned int num_seg)
+static int nvme_pci_npages_sgl(void)
{
- return DIV_ROUND_UP(num_seg * sizeof(struct nvme_sgl_desc), PAGE_SIZE);
+ return DIV_ROUND_UP(NVME_MAX_SEGS * sizeof(struct nvme_sgl_desc),
+ PAGE_SIZE);
}
-static size_t nvme_pci_iod_alloc_size(struct nvme_dev *dev,
- unsigned int size, unsigned int nseg, bool use_sgl)
+static size_t nvme_pci_iod_alloc_size(void)
{
- size_t alloc_size;
-
- if (use_sgl)
- alloc_size = sizeof(__le64 *) * nvme_pci_npages_sgl(nseg);
- else
- alloc_size = sizeof(__le64 *) * nvme_npages(size, dev);
+ size_t npages = max(nvme_pci_npages_prp(), nvme_pci_npages_sgl());
- return alloc_size + sizeof(struct scatterlist) * nseg;
+ return sizeof(__le64 *) * npages +
+ sizeof(struct scatterlist) * NVME_MAX_SEGS;
}
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
@@ -515,7 +516,7 @@ static inline bool nvme_pci_use_sgls(struct nvme_dev *dev, struct request *req)
static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- const int last_prp = dev->ctrl.page_size / sizeof(__le64) - 1;
+ const int last_prp = NVME_CTRL_PAGE_SIZE / sizeof(__le64) - 1;
dma_addr_t dma_addr = iod->first_dma, next_dma_addr;
int i;
@@ -582,34 +583,33 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
struct scatterlist *sg = iod->sg;
int dma_len = sg_dma_len(sg);
u64 dma_addr = sg_dma_address(sg);
- u32 page_size = dev->ctrl.page_size;
- int offset = dma_addr & (page_size - 1);
+ int offset = dma_addr & (NVME_CTRL_PAGE_SIZE - 1);
__le64 *prp_list;
void **list = nvme_pci_iod_list(req);
dma_addr_t prp_dma;
int nprps, i;
- length -= (page_size - offset);
+ length -= (NVME_CTRL_PAGE_SIZE - offset);
if (length <= 0) {
iod->first_dma = 0;
goto done;
}
- dma_len -= (page_size - offset);
+ dma_len -= (NVME_CTRL_PAGE_SIZE - offset);
if (dma_len) {
- dma_addr += (page_size - offset);
+ dma_addr += (NVME_CTRL_PAGE_SIZE - offset);
} else {
sg = sg_next(sg);
dma_addr = sg_dma_address(sg);
dma_len = sg_dma_len(sg);
}
- if (length <= page_size) {
+ if (length <= NVME_CTRL_PAGE_SIZE) {
iod->first_dma = dma_addr;
goto done;
}
- nprps = DIV_ROUND_UP(length, page_size);
+ nprps = DIV_ROUND_UP(length, NVME_CTRL_PAGE_SIZE);
if (nprps <= (256 / 8)) {
pool = dev->prp_small_pool;
iod->npages = 0;
@@ -628,7 +628,7 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
iod->first_dma = prp_dma;
i = 0;
for (;;) {
- if (i == page_size >> 3) {
+ if (i == NVME_CTRL_PAGE_SIZE >> 3) {
__le64 *old_prp_list = prp_list;
prp_list = dma_pool_alloc(pool, GFP_ATOMIC, &prp_dma);
if (!prp_list)
@@ -639,9 +639,9 @@ static blk_status_t nvme_pci_setup_prps(struct nvme_dev *dev,
i = 1;
}
prp_list[i++] = cpu_to_le64(dma_addr);
- dma_len -= page_size;
- dma_addr += page_size;
- length -= page_size;
+ dma_len -= NVME_CTRL_PAGE_SIZE;
+ dma_addr += NVME_CTRL_PAGE_SIZE;
+ length -= NVME_CTRL_PAGE_SIZE;
if (length <= 0)
break;
if (dma_len > 0)
@@ -751,8 +751,8 @@ static blk_status_t nvme_setup_prp_simple(struct nvme_dev *dev,
struct bio_vec *bv)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- unsigned int offset = bv->bv_offset & (dev->ctrl.page_size - 1);
- unsigned int first_prp_len = dev->ctrl.page_size - offset;
+ unsigned int offset = bv->bv_offset & (NVME_CTRL_PAGE_SIZE - 1);
+ unsigned int first_prp_len = NVME_CTRL_PAGE_SIZE - offset;
iod->first_dma = dma_map_bvec(dev->dev, bv, rq_dma_dir(req), 0);
if (dma_mapping_error(dev->dev, iod->first_dma))
@@ -794,7 +794,7 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
struct bio_vec bv = req_bvec(req);
if (!is_pci_p2pdma_page(bv.bv_page)) {
- if (bv.bv_offset + bv.bv_len <= dev->ctrl.page_size * 2)
+ if (bv.bv_offset + bv.bv_len <= NVME_CTRL_PAGE_SIZE * 2)
return nvme_setup_prp_simple(dev, req,
&cmnd->rw, &bv);
@@ -1396,12 +1396,12 @@ static int nvme_cmb_qdepth(struct nvme_dev *dev, int nr_io_queues,
{
int q_depth = dev->q_depth;
unsigned q_size_aligned = roundup(q_depth * entry_size,
- dev->ctrl.page_size);
+ NVME_CTRL_PAGE_SIZE);
if (q_size_aligned * nr_io_queues > dev->cmb_size) {
u64 mem_per_q = div_u64(dev->cmb_size, nr_io_queues);
- mem_per_q = round_down(mem_per_q, dev->ctrl.page_size);
+ mem_per_q = round_down(mem_per_q, NVME_CTRL_PAGE_SIZE);
q_depth = div_u64(mem_per_q, entry_size);
/*
@@ -1816,6 +1816,7 @@ static inline void nvme_release_cmb(struct nvme_dev *dev)
static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
{
+ u32 host_mem_size = dev->host_mem_size >> NVME_CTRL_PAGE_SHIFT;
u64 dma_addr = dev->host_mem_descs_dma;
struct nvme_command c;
int ret;
@@ -1824,8 +1825,7 @@ static int nvme_set_host_mem(struct nvme_dev *dev, u32 bits)
c.features.opcode = nvme_admin_set_features;
c.features.fid = cpu_to_le32(NVME_FEAT_HOST_MEM_BUF);
c.features.dword11 = cpu_to_le32(bits);
- c.features.dword12 = cpu_to_le32(dev->host_mem_size >>
- ilog2(dev->ctrl.page_size));
+ c.features.dword12 = cpu_to_le32(host_mem_size);
c.features.dword13 = cpu_to_le32(lower_32_bits(dma_addr));
c.features.dword14 = cpu_to_le32(upper_32_bits(dma_addr));
c.features.dword15 = cpu_to_le32(dev->nr_host_mem_descs);
@@ -1845,7 +1845,7 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
for (i = 0; i < dev->nr_host_mem_descs; i++) {
struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
- size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
+ size_t size = le32_to_cpu(desc->size) * NVME_CTRL_PAGE_SIZE;
dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
le64_to_cpu(desc->addr),
@@ -1897,7 +1897,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
break;
descs[i].addr = cpu_to_le64(dma_addr);
- descs[i].size = cpu_to_le32(len / dev->ctrl.page_size);
+ descs[i].size = cpu_to_le32(len / NVME_CTRL_PAGE_SIZE);
i++;
}
@@ -1913,7 +1913,7 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
out_free_bufs:
while (--i >= 0) {
- size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
+ size_t size = le32_to_cpu(descs[i].size) * NVME_CTRL_PAGE_SIZE;
dma_free_attrs(dev->dev, size, bufs[i],
le64_to_cpu(descs[i].addr),
@@ -2759,6 +2759,54 @@ static unsigned long check_vendor_combination_bug(struct pci_dev *pdev)
return 0;
}
+#ifdef CONFIG_ACPI
+static bool nvme_acpi_storage_d3(struct pci_dev *dev)
+{
+ struct acpi_device *adev;
+ struct pci_dev *root;
+ acpi_handle handle;
+ acpi_status status;
+ u8 val;
+
+ /*
+ * Look for _DSD property specifying that the storage device on the port
+ * must use D3 to support deep platform power savings during
+ * suspend-to-idle.
+ */
+ root = pcie_find_root_port(dev);
+ if (!root)
+ return false;
+
+ adev = ACPI_COMPANION(&root->dev);
+ if (!adev)
+ return false;
+
+ /*
+ * The property is defined in the PXSX device for South complex ports
+ * and in the PEGP device for North complex ports.
+ */
+ status = acpi_get_handle(adev->handle, "PXSX", &handle);
+ if (ACPI_FAILURE(status)) {
+ status = acpi_get_handle(adev->handle, "PEGP", &handle);
+ if (ACPI_FAILURE(status))
+ return false;
+ }
+
+ if (acpi_bus_get_device(handle, &adev))
+ return false;
+
+ if (fwnode_property_read_u8(acpi_fwnode_handle(adev), "StorageD3Enable",
+ &val))
+ return false;
+ return val == 1;
+}
+#else
+static inline bool nvme_acpi_storage_d3(struct pci_dev *dev)
+{
+ return false;
+}
+#endif /* CONFIG_ACPI */
+
static void nvme_async_probe(void *data, async_cookie_t cookie)
{
struct nvme_dev *dev = data;
@@ -2808,12 +2856,21 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
quirks |= check_vendor_combination_bug(pdev);
+ if (!noacpi && nvme_acpi_storage_d3(pdev)) {
+ /*
+ * Some systems use a bios work around to ask for D3 on
+ * platforms that support kernel managed suspend.
+ */
+ dev_info(&pdev->dev,
+ "platform quirk: setting simple suspend\n");
+ quirks |= NVME_QUIRK_SIMPLE_SUSPEND;
+ }
+
/*
* Double check that our mempool alloc size will cover the biggest
* command we support.
*/
- alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
- NVME_MAX_SEGS, true);
+ alloc_size = nvme_pci_iod_alloc_size();
WARN_ON_ONCE(alloc_size > PAGE_SIZE);
dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,