From b814735f5ca477a6d4f446f00d16da4a18e33bf5 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Fri, 2 Mar 2018 13:20:49 +0100
Subject: acpi, nfit: remove redundant __func__ in dev_dbg

Dynamic debug can be instructed to add the function name to the debug
output using the +f switch, so there is no need for the nfit module to
do it again. If a user decides to add the +f switch for nfit's dynamic
debug this results in double prints of the function name like the
following:

[ 2391.935383] acpi_nfit_ctl: nfit ACPI0012:00: acpi_nfit_ctl:nmem8 cmd: 10: func: 1 input length: 0

Thus remove the stray __func__ printing.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 75 +++++++++++++++++++++++-------------------------
 drivers/acpi/nfit/mce.c  |  5 ++--
 2 files changed, 38 insertions(+), 42 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index bbe48ad20886..39ad06143e78 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -476,8 +476,8 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 		in_buf.buffer.length = call_pkg->nd_size_in;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %d: func: %d input length: %d\n",
-			__func__, dimm_name, cmd, func, in_buf.buffer.length);
+	dev_dbg(dev, "%s cmd: %d: func: %d input length: %d\n",
+		dimm_name, cmd, func, in_buf.buffer.length);
 	print_hex_dump_debug("nvdimm in  ", DUMP_PREFIX_OFFSET, 4, 4,
 			in_buf.buffer.pointer,
 			min_t(u32, 256, in_buf.buffer.length), true);
@@ -506,8 +506,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (!out_obj) {
-		dev_dbg(dev, "%s:%s _DSM failed cmd: %s\n", __func__, dimm_name,
-				cmd_name);
+		dev_dbg(dev, "%s _DSM failed cmd: %s\n", dimm_name, cmd_name);
 		return -EINVAL;
 	}
 
@@ -528,13 +527,13 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 	}
 
 	if (out_obj->package.type != ACPI_TYPE_BUFFER) {
-		dev_dbg(dev, "%s:%s unexpected output object type cmd: %s type: %d\n",
-				__func__, dimm_name, cmd_name, out_obj->type);
+		dev_dbg(dev, "%s unexpected output object type cmd: %s type: %d\n",
+				dimm_name, cmd_name, out_obj->type);
 		rc = -EINVAL;
 		goto out;
 	}
 
-	dev_dbg(dev, "%s:%s cmd: %s output length: %d\n", __func__, dimm_name,
+	dev_dbg(dev, "%s cmd: %s output length: %d\n", dimm_name,
 			cmd_name, out_obj->buffer.length);
 	print_hex_dump_debug(cmd_name, DUMP_PREFIX_OFFSET, 4, 4,
 			out_obj->buffer.pointer,
@@ -546,14 +545,14 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 				out_obj->buffer.length - offset);
 
 		if (offset + out_size > out_obj->buffer.length) {
-			dev_dbg(dev, "%s:%s output object underflow cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output object underflow cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			break;
 		}
 
 		if (in_buf.buffer.length + offset + out_size > buf_len) {
-			dev_dbg(dev, "%s:%s output overrun cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s output overrun cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			rc = -ENXIO;
 			goto out;
 		}
@@ -655,7 +654,7 @@ static bool add_spa(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_spa->list);
 	memcpy(nfit_spa->spa, spa, sizeof(*spa));
 	list_add_tail(&nfit_spa->list, &acpi_desc->spas);
-	dev_dbg(dev, "%s: spa index: %d type: %s\n", __func__,
+	dev_dbg(dev, "spa index: %d type: %s\n",
 			spa->range_index,
 			spa_type_name(nfit_spa_type(spa)));
 	return true;
@@ -684,8 +683,8 @@ static bool add_memdev(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_memdev->list);
 	memcpy(nfit_memdev->memdev, memdev, sizeof(*memdev));
 	list_add_tail(&nfit_memdev->list, &acpi_desc->memdevs);
-	dev_dbg(dev, "%s: memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
-			__func__, memdev->device_handle, memdev->range_index,
+	dev_dbg(dev, "memdev handle: %#x spa: %d dcr: %d flags: %#x\n",
+			memdev->device_handle, memdev->range_index,
 			memdev->region_index, memdev->flags);
 	return true;
 }
@@ -727,7 +726,7 @@ static bool add_dcr(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_dcr->list);
 	memcpy(nfit_dcr->dcr, dcr, sizeof_dcr(dcr));
 	list_add_tail(&nfit_dcr->list, &acpi_desc->dcrs);
-	dev_dbg(dev, "%s: dcr index: %d windows: %d\n", __func__,
+	dev_dbg(dev, "dcr index: %d windows: %d\n",
 			dcr->region_index, dcr->windows);
 	return true;
 }
@@ -754,7 +753,7 @@ static bool add_bdw(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_bdw->list);
 	memcpy(nfit_bdw->bdw, bdw, sizeof(*bdw));
 	list_add_tail(&nfit_bdw->list, &acpi_desc->bdws);
-	dev_dbg(dev, "%s: bdw dcr: %d windows: %d\n", __func__,
+	dev_dbg(dev, "bdw dcr: %d windows: %d\n",
 			bdw->region_index, bdw->windows);
 	return true;
 }
@@ -793,7 +792,7 @@ static bool add_idt(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_idt->list);
 	memcpy(nfit_idt->idt, idt, sizeof_idt(idt));
 	list_add_tail(&nfit_idt->list, &acpi_desc->idts);
-	dev_dbg(dev, "%s: idt index: %d num_lines: %d\n", __func__,
+	dev_dbg(dev, "idt index: %d num_lines: %d\n",
 			idt->interleave_index, idt->line_count);
 	return true;
 }
@@ -833,7 +832,7 @@ static bool add_flush(struct acpi_nfit_desc *acpi_desc,
 	INIT_LIST_HEAD(&nfit_flush->list);
 	memcpy(nfit_flush->flush, flush, sizeof_flush(flush));
 	list_add_tail(&nfit_flush->list, &acpi_desc->flushes);
-	dev_dbg(dev, "%s: nfit_flush handle: %d hint_count: %d\n", __func__,
+	dev_dbg(dev, "nfit_flush handle: %d hint_count: %d\n",
 			flush->device_handle, flush->hint_count);
 	return true;
 }
@@ -846,7 +845,7 @@ static bool add_platform_cap(struct acpi_nfit_desc *acpi_desc,
 
 	mask = (1 << (pcap->highest_capability + 1)) - 1;
 	acpi_desc->platform_cap = pcap->capabilities & mask;
-	dev_dbg(dev, "%s: cap: %#x\n", __func__, acpi_desc->platform_cap);
+	dev_dbg(dev, "cap: %#x\n", acpi_desc->platform_cap);
 	return true;
 }
 
@@ -893,7 +892,7 @@ static void *add_table(struct acpi_nfit_desc *acpi_desc,
 			return err;
 		break;
 	case ACPI_NFIT_TYPE_SMBIOS:
-		dev_dbg(dev, "%s: smbios\n", __func__);
+		dev_dbg(dev, "smbios\n");
 		break;
 	case ACPI_NFIT_TYPE_CAPABILITIES:
 		if (!add_platform_cap(acpi_desc, table))
@@ -1621,7 +1620,7 @@ void __acpi_nvdimm_notify(struct device *dev, u32 event)
 	struct nfit_mem *nfit_mem;
 	struct acpi_nfit_desc *acpi_desc;
 
-	dev_dbg(dev->parent, "%s: %s: event: %d\n", dev_name(dev), __func__,
+	dev_dbg(dev->parent, "%s: event: %d\n", dev_name(dev),
 			event);
 
 	if (event != NFIT_NOTIFY_DIMM_HEALTH) {
@@ -2338,7 +2337,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	nvdimm = nd_blk_region_to_dimm(ndbr);
 	nfit_mem = nvdimm_provider_data(nvdimm);
 	if (!nfit_mem || !nfit_mem->dcr || !nfit_mem->bdw) {
-		dev_dbg(dev, "%s: missing%s%s%s\n", __func__,
+		dev_dbg(dev, "missing%s%s%s\n",
 				nfit_mem ? "" : " nfit_mem",
 				(nfit_mem && nfit_mem->dcr) ? "" : " dcr",
 				(nfit_mem && nfit_mem->bdw) ? "" : " bdw");
@@ -2357,7 +2356,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_memremap(dev, nfit_mem->spa_bdw->address,
                         nfit_mem->spa_bdw->length, nd_blk_memremap_flags(ndbr));
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map bdw\n", __func__,
+		dev_dbg(dev, "%s failed to map bdw\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2368,8 +2367,8 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_bdw,
 			nfit_mem->memdev_bdw->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init bdw interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init bdw interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2380,7 +2379,7 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	mmio->addr.base = devm_nvdimm_ioremap(dev, nfit_mem->spa_dcr->address,
 			nfit_mem->spa_dcr->length);
 	if (!mmio->addr.base) {
-		dev_dbg(dev, "%s: %s failed to map dcr\n", __func__,
+		dev_dbg(dev, "%s failed to map dcr\n",
 				nvdimm_name(nvdimm));
 		return -ENOMEM;
 	}
@@ -2391,15 +2390,15 @@ static int acpi_nfit_blk_region_enable(struct nvdimm_bus *nvdimm_bus,
 	rc = nfit_blk_init_interleave(mmio, nfit_mem->idt_dcr,
 			nfit_mem->memdev_dcr->interleave_ways);
 	if (rc) {
-		dev_dbg(dev, "%s: %s failed to init dcr interleave\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed to init dcr interleave\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
 	rc = acpi_nfit_blk_get_flags(nd_desc, nvdimm, nfit_blk);
 	if (rc < 0) {
-		dev_dbg(dev, "%s: %s failed get DIMM flags\n",
-				__func__, nvdimm_name(nvdimm));
+		dev_dbg(dev, "%s failed get DIMM flags\n",
+				nvdimm_name(nvdimm));
 		return rc;
 	}
 
@@ -2655,8 +2654,7 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 		return 0;
 
 	if (spa->range_index == 0 && !nfit_spa_is_virtual(spa)) {
-		dev_dbg(acpi_desc->dev, "%s: detected invalid spa index\n",
-				__func__);
+		dev_dbg(acpi_desc->dev, "detected invalid spa index\n");
 		return 0;
 	}
 
@@ -3142,8 +3140,7 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 		data = add_table(acpi_desc, &prev, data, end);
 
 	if (IS_ERR(data)) {
-		dev_dbg(dev, "%s: nfit table parsing error: %ld\n", __func__,
-				PTR_ERR(data));
+		dev_dbg(dev, "nfit table parsing error: %ld\n",	PTR_ERR(data));
 		rc = PTR_ERR(data);
 		goto out_unlock;
 	}
@@ -3259,7 +3256,7 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
 	}
 	acpi_desc->ars_start_flags = flags;
 	queue_work(nfit_wq, &acpi_desc->work);
-	dev_dbg(dev, "%s: ars_scan triggered\n", __func__);
+	dev_dbg(dev, "ars_scan triggered\n");
 	mutex_unlock(&acpi_desc->init_mutex);
 
 	return 0;
@@ -3366,8 +3363,8 @@ static int acpi_nfit_add(struct acpi_device *adev)
 			rc = acpi_nfit_init(acpi_desc, obj->buffer.pointer,
 					obj->buffer.length);
 		else
-			dev_dbg(dev, "%s invalid type %d, ignoring _FIT\n",
-				 __func__, (int) obj->type);
+			dev_dbg(dev, "invalid type %d, ignoring _FIT\n",
+				(int) obj->type);
 		kfree(buf.pointer);
 	} else
 		/* skip over the lead-in header table */
@@ -3396,7 +3393,7 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 
 	if (!dev->driver) {
 		/* dev->driver may be null if we're being removed */
-		dev_dbg(dev, "%s: no driver found for dev\n", __func__);
+		dev_dbg(dev, "no driver found for dev\n");
 		return;
 	}
 
@@ -3442,7 +3439,7 @@ static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 
 void __acpi_nfit_notify(struct device *dev, acpi_handle handle, u32 event)
 {
-	dev_dbg(dev, "%s: event: 0x%x\n", __func__, event);
+	dev_dbg(dev, "event: 0x%x\n", event);
 
 	switch (event) {
 	case NFIT_NOTIFY_UPDATE:
diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c
index b92921439657..e9626bf6ca29 100644
--- a/drivers/acpi/nfit/mce.c
+++ b/drivers/acpi/nfit/mce.c
@@ -51,9 +51,8 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val,
 			if ((spa->address + spa->length - 1) < mce->addr)
 				continue;
 			found_match = 1;
-			dev_dbg(dev, "%s: addr in SPA %d (0x%llx, 0x%llx)\n",
-				__func__, spa->range_index, spa->address,
-				spa->length);
+			dev_dbg(dev, "addr in SPA %d (0x%llx, 0x%llx)\n",
+				spa->range_index, spa->address, spa->length);
 			/*
 			 * We can break at the first match because we're going
 			 * to rescan all the SPA ranges. There shouldn't be any
-- 
cgit v1.2.3


From 426824d63b77bde61422ed6e19ac7c62c1f06fca Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Mar 2018 16:39:31 -0800
Subject: libnvdimm: remove redundant __func__ in dev_dbg

Dynamic debug can be instructed to add the function name to the debug
output using the +f switch, so there is no need for the libnvdimm
modules to do it again. If a user decides to add the +f switch for
libnvdimm's dynamic debug this results in double prints of the function
name.

Reported-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/btt_devs.c       | 21 ++++++++---------
 drivers/nvdimm/bus.c            | 13 +++++------
 drivers/nvdimm/claim.c          |  2 +-
 drivers/nvdimm/core.c           |  6 ++---
 drivers/nvdimm/dax_devs.c       |  5 ++--
 drivers/nvdimm/dimm_devs.c      |  7 +++---
 drivers/nvdimm/label.c          | 51 +++++++++++++++++++----------------------
 drivers/nvdimm/namespace_devs.c | 38 +++++++++++++-----------------
 drivers/nvdimm/pfn_devs.c       | 25 ++++++++++----------
 drivers/nvdimm/pmem.c           |  2 +-
 10 files changed, 77 insertions(+), 93 deletions(-)

diff --git a/drivers/nvdimm/btt_devs.c b/drivers/nvdimm/btt_devs.c
index d58925295aa7..795ad4ff35ca 100644
--- a/drivers/nvdimm/btt_devs.c
+++ b/drivers/nvdimm/btt_devs.c
@@ -26,7 +26,7 @@ static void nd_btt_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_btt *nd_btt = to_nd_btt(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_btt->dev, &nd_btt->ndns);
 	ida_simple_remove(&nd_region->btt_ida, nd_btt->id);
 	kfree(nd_btt->uuid);
@@ -74,8 +74,8 @@ static ssize_t sector_size_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_btt->lbasize,
 			btt_lbasize_supported);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -101,8 +101,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_btt->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -131,8 +131,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_btt->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -206,8 +206,8 @@ static struct device *__nd_btt_create(struct nd_region *nd_region,
 	dev->groups = nd_btt_attribute_groups;
 	device_initialize(&nd_btt->dev);
 	if (ndns && !__nd_attach_ndns(&nd_btt->dev, ndns, &nd_btt->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -346,8 +346,7 @@ int nd_btt_probe(struct device *dev, struct nd_namespace_common *ndns)
 		return -ENOMEM;
 	btt_sb = devm_kzalloc(dev, sizeof(*btt_sb), GFP_KERNEL);
 	rc = __nd_btt_probe(to_nd_btt(btt_dev), ndns, btt_sb);
-	dev_dbg(dev, "%s: btt: %s\n", __func__,
-			rc == 0 ? dev_name(btt_dev) : "<none>");
+	dev_dbg(dev, "btt: %s\n", rc == 0 ? dev_name(btt_dev) : "<none>");
 	if (rc < 0) {
 		struct nd_btt *nd_btt = to_nd_btt(btt_dev);
 
diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 78eabc3a1ab1..0ebc8c4dd0fa 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -984,8 +984,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	if (cmd == ND_CMD_CALL) {
 		func = pkg.nd_command;
-		dev_dbg(dev, "%s:%s, idx: %llu, in: %u, out: %u, len %llu\n",
-				__func__, dimm_name, pkg.nd_command,
+		dev_dbg(dev, "%s, idx: %llu, in: %u, out: %u, len %llu\n",
+				dimm_name, pkg.nd_command,
 				in_len, out_len, buf_len);
 	}
 
@@ -996,8 +996,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 		u32 copy;
 
 		if (out_size == UINT_MAX) {
-			dev_dbg(dev, "%s:%s unknown output size cmd: %s field: %d\n",
-					__func__, dimm_name, cmd_name, i);
+			dev_dbg(dev, "%s unknown output size cmd: %s field: %d\n",
+					dimm_name, cmd_name, i);
 			return -EFAULT;
 		}
 		if (out_len < sizeof(out_env))
@@ -1012,9 +1012,8 @@ static int __nd_ioctl(struct nvdimm_bus *nvdimm_bus, struct nvdimm *nvdimm,
 
 	buf_len = (u64) out_len + (u64) in_len;
 	if (buf_len > ND_IOCTL_MAX_BUFLEN) {
-		dev_dbg(dev, "%s:%s cmd: %s buf_len: %llu > %d\n", __func__,
-				dimm_name, cmd_name, buf_len,
-				ND_IOCTL_MAX_BUFLEN);
+		dev_dbg(dev, "%s cmd: %s buf_len: %llu > %d\n", dimm_name,
+				cmd_name, buf_len, ND_IOCTL_MAX_BUFLEN);
 		return -EINVAL;
 	}
 
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index b2fc29b8279b..30852270484f 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -148,7 +148,7 @@ ssize_t nd_namespace_store(struct device *dev,
 	char *name;
 
 	if (dev->driver) {
-		dev_dbg(dev, "%s: -EBUSY\n", __func__);
+		dev_dbg(dev, "namespace already active\n");
 		return -EBUSY;
 	}
 
diff --git a/drivers/nvdimm/core.c b/drivers/nvdimm/core.c
index 1dc527660637..acce050856a8 100644
--- a/drivers/nvdimm/core.c
+++ b/drivers/nvdimm/core.c
@@ -134,7 +134,7 @@ static void nvdimm_map_release(struct kref *kref)
 	nvdimm_map = container_of(kref, struct nvdimm_map, kref);
 	nvdimm_bus = nvdimm_map->nvdimm_bus;
 
-	dev_dbg(&nvdimm_bus->dev, "%s: %pa\n", __func__, &nvdimm_map->offset);
+	dev_dbg(&nvdimm_bus->dev, "%pa\n", &nvdimm_map->offset);
 	list_del(&nvdimm_map->list);
 	if (nvdimm_map->flags)
 		memunmap(nvdimm_map->mem);
@@ -230,8 +230,8 @@ static int nd_uuid_parse(struct device *dev, u8 *uuid_out, const char *buf,
 
 	for (i = 0; i < 16; i++) {
 		if (!isxdigit(str[0]) || !isxdigit(str[1])) {
-			dev_dbg(dev, "%s: pos: %d buf[%zd]: %c buf[%zd]: %c\n",
-					__func__, i, str - buf, str[0],
+			dev_dbg(dev, "pos: %d buf[%zd]: %c buf[%zd]: %c\n",
+					i, str - buf, str[0],
 					str + 1 - buf, str[1]);
 			return -EINVAL;
 		}
diff --git a/drivers/nvdimm/dax_devs.c b/drivers/nvdimm/dax_devs.c
index 1bf2bd318371..0453f49dc708 100644
--- a/drivers/nvdimm/dax_devs.c
+++ b/drivers/nvdimm/dax_devs.c
@@ -24,7 +24,7 @@ static void nd_dax_release(struct device *dev)
 	struct nd_dax *nd_dax = to_nd_dax(dev);
 	struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->dax_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -129,8 +129,7 @@ int nd_dax_probe(struct device *dev, struct nd_namespace_common *ndns)
 	pfn_sb = devm_kzalloc(dev, sizeof(*pfn_sb), GFP_KERNEL);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, DAX_SIG);
-	dev_dbg(dev, "%s: dax: %s\n", __func__,
-			rc == 0 ? dev_name(dax_dev) : "<none>");
+	dev_dbg(dev, "dax: %s\n", rc == 0 ? dev_name(dax_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(dax_dev, &nd_pfn->ndns);
 		put_device(dax_dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 097794d9f786..e00d45522b80 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -131,7 +131,7 @@ int nvdimm_init_config_data(struct nvdimm_drvdata *ndd)
 		}
 		memcpy(ndd->data + offset, cmd->out_buf, cmd->in_length);
 	}
-	dev_dbg(ndd->dev, "%s: len: %zu rc: %d\n", __func__, offset, rc);
+	dev_dbg(ndd->dev, "len: %zu rc: %d\n", offset, rc);
 	kfree(cmd);
 
 	return rc;
@@ -266,8 +266,7 @@ void nvdimm_drvdata_release(struct kref *kref)
 	struct device *dev = ndd->dev;
 	struct resource *res, *_r;
 
-	dev_dbg(dev, "%s\n", __func__);
-
+	dev_dbg(dev, "trace\n");
 	nvdimm_bus_lock(dev);
 	for_each_dpa_resource_safe(ndd, res, _r)
 		nvdimm_free_dpa(ndd, res);
@@ -660,7 +659,7 @@ int nvdimm_bus_check_dimm_count(struct nvdimm_bus *nvdimm_bus, int dimm_count)
 	nd_synchronize();
 
 	device_for_each_child(&nvdimm_bus->dev, &count, count_dimms);
-	dev_dbg(&nvdimm_bus->dev, "%s: count: %d\n", __func__, count);
+	dev_dbg(&nvdimm_bus->dev, "count: %d\n", count);
 	if (count != dimm_count)
 		return -ENXIO;
 	return 0;
diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index de66c02f6140..c858cfadea40 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -121,8 +121,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 
 		memcpy(sig, nsindex[i]->sig, NSINDEX_SIG_LEN);
 		if (memcmp(sig, NSINDEX_SIGNATURE, NSINDEX_SIG_LEN) != 0) {
-			dev_dbg(dev, "%s: nsindex%d signature invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d signature invalid\n", i);
 			continue;
 		}
 
@@ -135,8 +134,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 			labelsize = 128;
 
 		if (labelsize != sizeof_namespace_label(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d labelsize %d invalid\n",
-					__func__, i, nsindex[i]->labelsize);
+			dev_dbg(dev, "nsindex%d labelsize %d invalid\n",
+					i, nsindex[i]->labelsize);
 			continue;
 		}
 
@@ -145,30 +144,28 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		sum = nd_fletcher64(nsindex[i], sizeof_namespace_index(ndd), 1);
 		nsindex[i]->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(dev, "%s: nsindex%d checksum invalid\n",
-					__func__, i);
+			dev_dbg(dev, "nsindex%d checksum invalid\n", i);
 			continue;
 		}
 
 		seq = __le32_to_cpu(nsindex[i]->seq);
 		if ((seq & NSINDEX_SEQ_MASK) == 0) {
-			dev_dbg(dev, "%s: nsindex%d sequence: %#x invalid\n",
-					__func__, i, seq);
+			dev_dbg(dev, "nsindex%d sequence: %#x invalid\n", i, seq);
 			continue;
 		}
 
 		/* sanity check the index against expected values */
 		if (__le64_to_cpu(nsindex[i]->myoff)
 				!= i * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d myoff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d myoff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->myoff));
 			continue;
 		}
 		if (__le64_to_cpu(nsindex[i]->otheroff)
 				!= (!i) * sizeof_namespace_index(ndd)) {
-			dev_dbg(dev, "%s: nsindex%d otheroff: %#llx invalid\n",
-					__func__, i, (unsigned long long)
+			dev_dbg(dev, "nsindex%d otheroff: %#llx invalid\n",
+					i, (unsigned long long)
 					__le64_to_cpu(nsindex[i]->otheroff));
 			continue;
 		}
@@ -176,8 +173,7 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		size = __le64_to_cpu(nsindex[i]->mysize);
 		if (size > sizeof_namespace_index(ndd)
 				|| size < sizeof(struct nd_namespace_index)) {
-			dev_dbg(dev, "%s: nsindex%d mysize: %#llx invalid\n",
-					__func__, i, size);
+			dev_dbg(dev, "nsindex%d mysize: %#llx invalid\n", i, size);
 			continue;
 		}
 
@@ -185,9 +181,8 @@ static int __nd_label_validate(struct nvdimm_drvdata *ndd)
 		if (nslot * sizeof_namespace_label(ndd)
 				+ 2 * sizeof_namespace_index(ndd)
 				> ndd->nsarea.config_size) {
-			dev_dbg(dev, "%s: nsindex%d nslot: %u invalid, config_size: %#x\n",
-					__func__, i, nslot,
-					ndd->nsarea.config_size);
+			dev_dbg(dev, "nsindex%d nslot: %u invalid, config_size: %#x\n",
+					i, nslot, ndd->nsarea.config_size);
 			continue;
 		}
 		valid[i] = true;
@@ -356,8 +351,8 @@ static bool slot_valid(struct nvdimm_drvdata *ndd,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum_save);
 		if (sum != sum_save) {
-			dev_dbg(ndd->dev, "%s fail checksum. slot: %d expect: %#llx\n",
-				__func__, slot, sum);
+			dev_dbg(ndd->dev, "fail checksum. slot: %d expect: %#llx\n",
+				slot, sum);
 			return false;
 		}
 	}
@@ -422,8 +417,8 @@ int nd_label_active_count(struct nvdimm_drvdata *ndd)
 			u64 dpa = __le64_to_cpu(nd_label->dpa);
 
 			dev_dbg(ndd->dev,
-				"%s: slot%d invalid slot: %d dpa: %llx size: %llx\n",
-					__func__, slot, label_slot, dpa, size);
+				"slot%d invalid slot: %d dpa: %llx size: %llx\n",
+					slot, label_slot, dpa, size);
 			continue;
 		}
 		count++;
@@ -650,7 +645,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 	slot = nd_label_alloc_slot(ndd);
 	if (slot == UINT_MAX)
 		return -ENXIO;
-	dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+	dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 	nd_label = to_label(ndd, slot);
 	memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -678,7 +673,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		sum = nd_fletcher64(nd_label, sizeof_namespace_label(ndd), 1);
 		nd_label->checksum = __cpu_to_le64(sum);
 	}
-	nd_dbg_dpa(nd_region, ndd, res, "%s\n", __func__);
+	nd_dbg_dpa(nd_region, ndd, res, "\n");
 
 	/* update label */
 	offset = nd_label_offset(ndd, nd_label);
@@ -700,7 +695,7 @@ static int __pmem_label_update(struct nd_region *nd_region,
 		break;
 	}
 	if (victim) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		slot = to_slot(ndd, victim->label);
 		nd_label_free_slot(ndd, slot);
 		victim->label = NULL;
@@ -868,7 +863,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 		slot = nd_label_alloc_slot(ndd);
 		if (slot == UINT_MAX)
 			goto abort;
-		dev_dbg(ndd->dev, "%s: allocated: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "allocated: %d\n", slot);
 
 		nd_label = to_label(ndd, slot);
 		memset(nd_label, 0, sizeof_namespace_label(ndd));
@@ -928,7 +923,7 @@ static int __blk_label_update(struct nd_region *nd_region,
 
 	/* free up now unused slots in the new index */
 	for_each_set_bit(slot, victim_map, victim_map ? nslot : 0) {
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		nd_label_free_slot(ndd, slot);
 	}
 
@@ -1092,7 +1087,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 		active--;
 		slot = to_slot(ndd, nd_label);
 		nd_label_free_slot(ndd, slot);
-		dev_dbg(ndd->dev, "%s: free: %d\n", __func__, slot);
+		dev_dbg(ndd->dev, "free: %d\n", slot);
 		list_move_tail(&label_ent->list, &list);
 		label_ent->label = NULL;
 	}
@@ -1100,7 +1095,7 @@ static int del_labels(struct nd_mapping *nd_mapping, u8 *uuid)
 
 	if (active == 0) {
 		nd_mapping_free_labels(nd_mapping);
-		dev_dbg(ndd->dev, "%s: no more active labels\n", __func__);
+		dev_dbg(ndd->dev, "no more active labels\n");
 	}
 	mutex_unlock(&nd_mapping->lock);
 
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 658ada497be0..89b40ff83025 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -421,7 +421,7 @@ static ssize_t alt_name_store(struct device *dev,
 	rc = __alt_name_store(dev, buf, len);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1007,7 +1007,7 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
 	if (uuid_not_set(uuid, dev, __func__))
 		return -ENXIO;
 	if (nd_region->ndr_mappings == 0) {
-		dev_dbg(dev, "%s: not associated with dimm(s)\n", __func__);
+		dev_dbg(dev, "not associated with dimm(s)\n");
 		return -ENXIO;
 	}
 
@@ -1105,8 +1105,7 @@ static ssize_t size_store(struct device *dev,
 		*uuid = NULL;
 	}
 
-	dev_dbg(dev, "%s: %llx %s (%d)\n", __func__, val, rc < 0
-			? "fail" : "success", rc);
+	dev_dbg(dev, "%llx %s (%d)\n", val, rc < 0 ? "fail" : "success", rc);
 
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
@@ -1270,8 +1269,8 @@ static ssize_t uuid_store(struct device *dev,
 		rc = nd_namespace_label_update(nd_region, dev);
 	else
 		kfree(uuid);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1355,9 +1354,8 @@ static ssize_t sector_size_store(struct device *dev,
 		rc = nd_size_select_store(dev, buf, lbasize, supported);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: result: %zd %s: %s%s", __func__,
-			rc, rc < 0 ? "tried" : "wrote", buf,
-			buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd %s: %s%s", rc, rc < 0 ? "tried" : "wrote",
+			buf, buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1519,7 +1517,7 @@ static ssize_t holder_class_store(struct device *dev,
 	rc = __holder_class_store(dev, buf);
 	if (rc >= 0)
 		rc = nd_namespace_label_update(nd_region, dev);
-	dev_dbg(dev, "%s: %s(%zd)\n", __func__, rc < 0 ? "fail " : "", rc);
+	dev_dbg(dev, "%s(%zd)\n", rc < 0 ? "fail " : "", rc);
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -1717,8 +1715,7 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
 		if (uuid_not_set(nsblk->uuid, &ndns->dev, __func__))
 			return ERR_PTR(-ENODEV);
 		if (!nsblk->lbasize) {
-			dev_dbg(&ndns->dev, "%s: sector size not set\n",
-				__func__);
+			dev_dbg(&ndns->dev, "sector size not set\n");
 			return ERR_PTR(-ENODEV);
 		}
 		if (!nd_namespace_blk_validate(nsblk))
@@ -1798,9 +1795,7 @@ static bool has_uuid_at_pos(struct nd_region *nd_region, u8 *uuid,
 			}
 
 			if (found_uuid) {
-				dev_dbg(ndd->dev,
-						"%s duplicate entry for uuid\n",
-						__func__);
+				dev_dbg(ndd->dev, "duplicate entry for uuid\n");
 				return false;
 			}
 			found_uuid = true;
@@ -1994,14 +1989,13 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	namespace_pmem_release(dev);
 	switch (rc) {
 	case -EINVAL:
-		dev_dbg(&nd_region->dev, "%s: invalid label(s)\n", __func__);
+		dev_dbg(&nd_region->dev, "invalid label(s)\n");
 		break;
 	case -ENODEV:
-		dev_dbg(&nd_region->dev, "%s: label not found\n", __func__);
+		dev_dbg(&nd_region->dev, "label not found\n");
 		break;
 	default:
-		dev_dbg(&nd_region->dev, "%s: unexpected err: %d\n",
-				__func__, rc);
+		dev_dbg(&nd_region->dev, "unexpected err: %d\n", rc);
 		break;
 	}
 	return ERR_PTR(rc);
@@ -2334,8 +2328,8 @@ static struct device **scan_labels(struct nd_region *nd_region)
 
 	}
 
-	dev_dbg(&nd_region->dev, "%s: discovered %d %s namespace%s\n",
-			__func__, count, is_nd_blk(&nd_region->dev)
+	dev_dbg(&nd_region->dev, "discovered %d %s namespace%s\n",
+			count, is_nd_blk(&nd_region->dev)
 			? "blk" : "pmem", count == 1 ? "" : "s");
 
 	if (count == 0) {
@@ -2467,7 +2461,7 @@ static int init_active_labels(struct nd_region *nd_region)
 		get_ndd(ndd);
 
 		count = nd_label_active_count(ndd);
-		dev_dbg(ndd->dev, "%s: %d\n", __func__, count);
+		dev_dbg(ndd->dev, "count: %d\n", count);
 		if (!count)
 			continue;
 		for (j = 0; j < count; j++) {
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index f5c4e8c6e29d..e7e42130a375 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -27,7 +27,7 @@ static void nd_pfn_release(struct device *dev)
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	struct nd_pfn *nd_pfn = to_nd_pfn(dev);
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 	nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns);
 	ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id);
 	kfree(nd_pfn->uuid);
@@ -94,8 +94,8 @@ static ssize_t mode_store(struct device *dev,
 		else
 			rc = -EINVAL;
 	}
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -144,8 +144,8 @@ static ssize_t align_store(struct device *dev,
 	nvdimm_bus_lock(dev);
 	rc = nd_size_select_store(dev, buf, &nd_pfn->align,
 			nd_pfn_supported_alignments());
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -171,8 +171,8 @@ static ssize_t uuid_store(struct device *dev,
 
 	device_lock(dev);
 	rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	device_unlock(dev);
 
 	return rc ? rc : len;
@@ -201,8 +201,8 @@ static ssize_t namespace_store(struct device *dev,
 	device_lock(dev);
 	nvdimm_bus_lock(dev);
 	rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len);
-	dev_dbg(dev, "%s: result: %zd wrote: %s%s", __func__,
-			rc, buf, buf[len - 1] == '\n' ? "" : "\n");
+	dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf,
+			buf[len - 1] == '\n' ? "" : "\n");
 	nvdimm_bus_unlock(dev);
 	device_unlock(dev);
 
@@ -314,8 +314,8 @@ struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn,
 	dev = &nd_pfn->dev;
 	device_initialize(&nd_pfn->dev);
 	if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) {
-		dev_dbg(&ndns->dev, "%s failed, already claimed by %s\n",
-				__func__, dev_name(ndns->claim));
+		dev_dbg(&ndns->dev, "failed, already claimed by %s\n",
+				dev_name(ndns->claim));
 		put_device(dev);
 		return NULL;
 	}
@@ -510,8 +510,7 @@ int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns)
 	nd_pfn = to_nd_pfn(pfn_dev);
 	nd_pfn->pfn_sb = pfn_sb;
 	rc = nd_pfn_validate(nd_pfn, PFN_SIG);
-	dev_dbg(dev, "%s: pfn: %s\n", __func__,
-			rc == 0 ? dev_name(pfn_dev) : "<none>");
+	dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>");
 	if (rc < 0) {
 		nd_detach_ndns(pfn_dev, &nd_pfn->ndns);
 		put_device(pfn_dev);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 06f8dcc52ca6..906d31e56261 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -66,7 +66,7 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem,
 		rc = BLK_STS_IOERR;
 	if (cleared > 0 && cleared / 512) {
 		cleared /= 512;
-		dev_dbg(dev, "%s: %#llx clear %ld sector%s\n", __func__,
+		dev_dbg(dev, "%#llx clear %ld sector%s\n",
 				(unsigned long long) sector, cleared,
 				cleared > 1 ? "s" : "");
 		badblocks_clear(&pmem->bb, sector, cleared);
-- 
cgit v1.2.3


From 6daaca522ab464d32419aea61dd1b41dc1c71dba Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Mar 2018 16:40:05 -0800
Subject: device-dax: remove redundant __func__ in dev_dbg

Dynamic debug can be instructed to add the function name to the debug
output using the +f switch, so there is no need for the dax modules to
do it again. If a user decides to add the +f switch for the dax modules'
dynamic debug this results in double prints of the function name.

Reported-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/device.c | 38 +++++++++++++++++---------------------
 drivers/dax/pmem.c   |  6 +++---
 2 files changed, 20 insertions(+), 24 deletions(-)

diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 2137dbc29877..37be5a306c8f 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -257,8 +257,8 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PAGE_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -267,8 +267,7 @@ static int __dev_dax_pte_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	phys = dax_pgoff_to_phys(dev_dax, vmf->pgoff, PAGE_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				vmf->pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", vmf->pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -299,14 +298,14 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PMD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pmd mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -323,8 +322,7 @@ static int __dev_dax_pmd_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pmd_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PMD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -351,14 +349,14 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 
 	dax_region = dev_dax->region;
 	if (dax_region->align > PUD_SIZE) {
-		dev_dbg(dev, "%s: alignment (%#x) > fault size (%#x)\n",
-			__func__, dax_region->align, fault_size);
+		dev_dbg(dev, "alignment (%#x) > fault size (%#x)\n",
+			dax_region->align, fault_size);
 		return VM_FAULT_SIGBUS;
 	}
 
 	/* dax pud mappings require pfn_t_devmap() */
 	if ((dax_region->pfn_flags & (PFN_DEV|PFN_MAP)) != (PFN_DEV|PFN_MAP)) {
-		dev_dbg(dev, "%s: region lacks devmap flags\n", __func__);
+		dev_dbg(dev, "region lacks devmap flags\n");
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -375,8 +373,7 @@ static int __dev_dax_pud_fault(struct dev_dax *dev_dax, struct vm_fault *vmf)
 	pgoff = linear_page_index(vmf->vma, pud_addr);
 	phys = dax_pgoff_to_phys(dev_dax, pgoff, PUD_SIZE);
 	if (phys == -1) {
-		dev_dbg(dev, "%s: pgoff_to_phys(%#lx) failed\n", __func__,
-				pgoff);
+		dev_dbg(dev, "pgoff_to_phys(%#lx) failed\n", pgoff);
 		return VM_FAULT_SIGBUS;
 	}
 
@@ -399,9 +396,8 @@ static int dev_dax_huge_fault(struct vm_fault *vmf,
 	struct file *filp = vmf->vma->vm_file;
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s: %s: %s (%#lx - %#lx) size = %d\n", __func__,
-			current->comm, (vmf->flags & FAULT_FLAG_WRITE)
-			? "write" : "read",
+	dev_dbg(&dev_dax->dev, "%s: %s (%#lx - %#lx) size = %d\n", current->comm,
+			(vmf->flags & FAULT_FLAG_WRITE) ? "write" : "read",
 			vmf->vma->vm_start, vmf->vma->vm_end, pe_size);
 
 	id = dax_read_lock();
@@ -450,7 +446,7 @@ static int dax_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct dev_dax *dev_dax = filp->private_data;
 	int rc, id;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 
 	/*
 	 * We lock to check dax_dev liveness and will re-check at
@@ -508,7 +504,7 @@ static int dax_open(struct inode *inode, struct file *filp)
 	struct inode *__dax_inode = dax_inode(dax_dev);
 	struct dev_dax *dev_dax = dax_get_private(dax_dev);
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	inode->i_mapping = __dax_inode->i_mapping;
 	inode->i_mapping->host = __dax_inode;
 	filp->f_mapping = inode->i_mapping;
@@ -523,7 +519,7 @@ static int dax_release(struct inode *inode, struct file *filp)
 {
 	struct dev_dax *dev_dax = filp->private_data;
 
-	dev_dbg(&dev_dax->dev, "%s\n", __func__);
+	dev_dbg(&dev_dax->dev, "trace\n");
 	return 0;
 }
 
@@ -565,7 +561,7 @@ static void unregister_dev_dax(void *dev)
 	struct inode *inode = dax_inode(dax_dev);
 	struct cdev *cdev = inode->i_cdev;
 
-	dev_dbg(dev, "%s\n", __func__);
+	dev_dbg(dev, "trace\n");
 
 	kill_dev_dax(dev_dax);
 	cdev_device_del(cdev, dev);
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 31b6ecce4c64..f7c0137e9325 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -34,7 +34,7 @@ static void dax_pmem_percpu_release(struct percpu_ref *ref)
 {
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	complete(&dax_pmem->cmp);
 }
 
@@ -43,7 +43,7 @@ static void dax_pmem_percpu_exit(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	wait_for_completion(&dax_pmem->cmp);
 	percpu_ref_exit(ref);
 }
@@ -53,7 +53,7 @@ static void dax_pmem_percpu_kill(void *data)
 	struct percpu_ref *ref = data;
 	struct dax_pmem *dax_pmem = to_dax_pmem(ref);
 
-	dev_dbg(dax_pmem->dev, "%s\n", __func__);
+	dev_dbg(dax_pmem->dev, "trace\n");
 	percpu_ref_kill(ref);
 }
 
-- 
cgit v1.2.3


From d7d8464dcc27dfc9d0d23c7924b2e6f4bb16c6c2 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 27 Feb 2018 10:29:50 -0700
Subject: nfit_test: improve structure offset handling

In nfit_test0_setup() and nfit_test1_setup() we keep an 'offset' value
which we use to calculate where in our 'nfit_buf' we will place our next
structure.  The handling of 'offset' and the calculation of the placement
of the next structure is a bit inconsistent, though.  We don't update
'offset' after we insert each structure, sometimes causing us to update it
for multiple structures' sizes at once.  When calculating the position of
the next structure we aren't always able to just use 'offset', but
sometimes have to add in other structure sizes as well.

Fix this by updating 'offset' after each structure insertion in a
consistent way, allowing us to always calculate the position of the next
structure to be inserted by just using 'nfit_buf + offset'.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 183 +++++++++++++++++++++++----------------
 1 file changed, 109 insertions(+), 74 deletions(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 620fa78b3b1b..1376fc95c33a 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1366,7 +1366,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	struct acpi_nfit_data_region *bdw;
 	struct acpi_nfit_flush_address *flush;
 	struct acpi_nfit_capabilities *pcap;
-	unsigned int offset, i;
+	unsigned int offset = 0, i;
 
 	/*
 	 * spa0 (interleave first half of dimm0 and dimm1, note storage
@@ -1380,93 +1380,102 @@ static void nfit_test0_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA0_SIZE;
+	offset += spa->header.length;
 
 	/*
 	 * spa1 (interleave last half of the 4 DIMMS, note storage
 	 * does not actually alias the related block-data-window
 	 * regions)
 	 */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 	spa->range_index = 1+1;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA1_SIZE;
+	offset += spa->header.length;
 
 	/* spa2 (dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 2;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 2+1;
 	spa->address = t->dcr_dma[0];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa3 (dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 3;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 3+1;
 	spa->address = t->dcr_dma[1];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa4 (dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 4;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 4+1;
 	spa->address = t->dcr_dma[2];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa5 (dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 5;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16);
 	spa->range_index = 5+1;
 	spa->address = t->dcr_dma[3];
 	spa->length = DCR_SIZE;
+	offset += spa->header.length;
 
 	/* spa6 (bdw for dcr0) dimm0 */
-	spa = nfit_buf + sizeof(*spa) * 6;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 6+1;
 	spa->address = t->dimm_dma[0];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa7 (bdw for dcr1) dimm1 */
-	spa = nfit_buf + sizeof(*spa) * 7;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 7+1;
 	spa->address = t->dimm_dma[1];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa8 (bdw for dcr2) dimm2 */
-	spa = nfit_buf + sizeof(*spa) * 8;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 8+1;
 	spa->address = t->dimm_dma[2];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
 	/* spa9 (bdw for dcr3) dimm3 */
-	spa = nfit_buf + sizeof(*spa) * 9;
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 	spa->range_index = 9+1;
 	spa->address = t->dimm_dma[3];
 	spa->length = DIMM_SIZE;
+	offset += spa->header.length;
 
-	offset = sizeof(*spa) * 10;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1481,9 +1490,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
+	offset += memdev->header.length;
 
 	/* mem-region1 (spa0, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map);
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1497,9 +1507,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 2;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region2 (spa1, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1513,9 +1524,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region3 (spa1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1528,9 +1540,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region4 (spa1, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1544,9 +1557,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
 	/* mem-region5 (spa1, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1559,9 +1573,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = SPA0_SIZE/2;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 4;
+	offset += memdev->header.length;
 
 	/* mem-region6 (spa/dcr0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1574,9 +1589,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region7 (spa/dcr1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1589,9 +1605,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region8 (spa/dcr2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1604,9 +1621,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region9 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1619,9 +1637,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region10 (spa/bdw0, dimm0) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[0];
@@ -1634,9 +1653,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region11 (spa/bdw1, dimm1) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[1];
@@ -1649,9 +1669,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region12 (spa/bdw2, dimm2) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[2];
@@ -1664,9 +1685,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->address = 0;
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
+	offset += memdev->header.length;
 
 	/* mem-region13 (spa/dcr3, dimm3) */
-	memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13;
+	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
 	memdev->device_handle = handle[3];
@@ -1680,12 +1702,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+	offset += memdev->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_memory_map) * 14;
 	/* dcr-descriptor0: blk */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 0+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[0];
@@ -1696,11 +1718,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 1+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[1];
@@ -1711,11 +1734,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 2+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[2];
@@ -1726,11 +1750,12 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: blk */
-	dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-	dcr->header.length = sizeof(struct acpi_nfit_control_region);
+	dcr->header.length = sizeof(*dcr);
 	dcr->region_index = 3+1;
 	dcr_common_init(dcr);
 	dcr->serial_number = ~handle[3];
@@ -1741,8 +1766,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->command_size = 8;
 	dcr->status_offset = 8;
 	dcr->status_size = 4;
+	offset += dcr->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_control_region) * 4;
 	/* dcr-descriptor0: pmem */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1753,10 +1778,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[0];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor1: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size);
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1765,10 +1790,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[1];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor2: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 2;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1777,10 +1802,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[2];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	/* dcr-descriptor3: pmem */
-	dcr = nfit_buf + offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 3;
+	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
 			window_size);
@@ -1789,54 +1814,56 @@ static void nfit_test0_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[3];
 	dcr->code = NFIT_FIC_BYTEN;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
-	offset = offset + offsetof(struct acpi_nfit_control_region,
-			window_size) * 4;
 	/* bdw0 (spa/dcr0, dimm0) */
 	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 0+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw1 (spa/dcr1, dimm1) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region);
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 1+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw2 (spa/dcr2, dimm2) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 2+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
 	/* bdw3 (spa/dcr3, dimm3) */
-	bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3;
+	bdw = nfit_buf + offset;
 	bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-	bdw->header.length = sizeof(struct acpi_nfit_data_region);
+	bdw->header.length = sizeof(*bdw);
 	bdw->region_index = 3+1;
 	bdw->windows = 1;
 	bdw->offset = 0;
 	bdw->size = BDW_SIZE;
 	bdw->capacity = DIMM_SIZE;
 	bdw->start_address = 0;
+	offset += bdw->header.length;
 
-	offset = offset + sizeof(struct acpi_nfit_data_region) * 4;
 	/* flush0 (dimm0) */
 	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -1845,48 +1872,52 @@ static void nfit_test0_setup(struct nfit_test *t)
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[0] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush1 (dimm1) */
-	flush = nfit_buf + offset + flush_hint_size * 1;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[1];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[1] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush2 (dimm2) */
-	flush = nfit_buf + offset + flush_hint_size  * 2;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[2];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[2] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* flush3 (dimm3) */
-	flush = nfit_buf + offset + flush_hint_size * 3;
+	flush = nfit_buf + offset;
 	flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
 	flush->header.length = flush_hint_size;
 	flush->device_handle = handle[3];
 	flush->hint_count = NUM_HINTS;
 	for (i = 0; i < NUM_HINTS; i++)
 		flush->hint_address[i] = t->flush_dma[3] + i * sizeof(u64);
+	offset += flush->header.length;
 
 	/* platform capabilities */
-	pcap = nfit_buf + offset + flush_hint_size * 4;
+	pcap = nfit_buf + offset;
 	pcap->header.type = ACPI_NFIT_TYPE_CAPABILITIES;
 	pcap->header.length = sizeof(*pcap);
 	pcap->highest_capability = 1;
 	pcap->capabilities = ACPI_NFIT_CAPABILITY_CACHE_FLUSH |
 		ACPI_NFIT_CAPABILITY_MEM_FLUSH;
+	offset += pcap->header.length;
 
 	if (t->setup_hotplug) {
-		offset = offset + flush_hint_size * 4 + sizeof(*pcap);
 		/* dcr-descriptor4: blk */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
-		dcr->header.length = sizeof(struct acpi_nfit_control_region);
+		dcr->header.length = sizeof(*dcr);
 		dcr->region_index = 8+1;
 		dcr_common_init(dcr);
 		dcr->serial_number = ~handle[4];
@@ -1897,8 +1928,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->command_size = 8;
 		dcr->status_offset = 8;
 		dcr->status_size = 4;
+		offset += dcr->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_control_region);
 		/* dcr-descriptor4: pmem */
 		dcr = nfit_buf + offset;
 		dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -1909,21 +1940,20 @@ static void nfit_test0_setup(struct nfit_test *t)
 		dcr->serial_number = ~handle[4];
 		dcr->code = NFIT_FIC_BYTEN;
 		dcr->windows = 0;
+		offset += dcr->header.length;
 
-		offset = offset + offsetof(struct acpi_nfit_control_region,
-				window_size);
 		/* bdw4 (spa/dcr4, dimm4) */
 		bdw = nfit_buf + offset;
 		bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION;
-		bdw->header.length = sizeof(struct acpi_nfit_data_region);
+		bdw->header.length = sizeof(*bdw);
 		bdw->region_index = 8+1;
 		bdw->windows = 1;
 		bdw->offset = 0;
 		bdw->size = BDW_SIZE;
 		bdw->capacity = DIMM_SIZE;
 		bdw->start_address = 0;
+		offset += bdw->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_data_region);
 		/* spa10 (dcr4) dimm4 */
 		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
@@ -1932,30 +1962,32 @@ static void nfit_test0_setup(struct nfit_test *t)
 		spa->range_index = 10+1;
 		spa->address = t->dcr_dma[4];
 		spa->length = DCR_SIZE;
+		offset += spa->header.length;
 
 		/*
 		 * spa11 (single-dimm interleave for hotplug, note storage
 		 * does not actually alias the related block-data-window
 		 * regions)
 		 */
-		spa = nfit_buf + offset + sizeof(*spa);
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16);
 		spa->range_index = 11+1;
 		spa->address = t->spa_set_dma[2];
 		spa->length = SPA0_SIZE;
+		offset += spa->header.length;
 
 		/* spa12 (bdw for dcr4) dimm4 */
-		spa = nfit_buf + offset + sizeof(*spa) * 2;
+		spa = nfit_buf + offset;
 		spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 		spa->header.length = sizeof(*spa);
 		memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16);
 		spa->range_index = 12+1;
 		spa->address = t->dimm_dma[4];
 		spa->length = DIMM_SIZE;
+		offset += spa->header.length;
 
-		offset = offset + sizeof(*spa) * 3;
 		/* mem-region14 (spa/dcr4, dimm4) */
 		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -1970,10 +2002,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		/* mem-region15 (spa0, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map);
+		/* mem-region15 (spa11, dimm4) */
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -1987,10 +2019,10 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
 		memdev->flags = ACPI_NFIT_MEM_HEALTH_ENABLED;
+		offset += memdev->header.length;
 
 		/* mem-region16 (spa/bdw4, dimm4) */
-		memdev = nfit_buf + offset +
-				sizeof(struct acpi_nfit_memory_map) * 2;
+		memdev = nfit_buf + offset;
 		memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 		memdev->header.length = sizeof(*memdev);
 		memdev->device_handle = handle[4];
@@ -2003,8 +2035,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		memdev->address = 0;
 		memdev->interleave_index = 0;
 		memdev->interleave_ways = 1;
+		offset += memdev->header.length;
 
-		offset = offset + sizeof(struct acpi_nfit_memory_map) * 3;
 		/* flush3 (dimm4) */
 		flush = nfit_buf + offset;
 		flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS;
@@ -2014,6 +2046,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 		for (i = 0; i < NUM_HINTS; i++)
 			flush->hint_address[i] = t->flush_dma[4]
 				+ i * sizeof(u64);
+		offset += flush->header.length;
 	}
 
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
@@ -2061,17 +2094,18 @@ static void nfit_test1_setup(struct nfit_test *t)
 	spa->range_index = 0+1;
 	spa->address = t->spa_set_dma[0];
 	spa->length = SPA2_SIZE;
+	offset += spa->header.length;
 
 	/* virtual cd region */
-	spa = nfit_buf + sizeof(*spa);
+	spa = nfit_buf + offset;
 	spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS;
 	spa->header.length = sizeof(*spa);
 	memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_VCD), 16);
 	spa->range_index = 0;
 	spa->address = t->spa_set_dma[1];
 	spa->length = SPA_VCD_SIZE;
+	offset += spa->header.length;
 
-	offset += sizeof(*spa) * 2;
 	/* mem-region0 (spa0, dimm0) */
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
@@ -2089,8 +2123,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED
 		| ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED
 		| ACPI_NFIT_MEM_NOT_ARMED;
+	offset += memdev->header.length;
 
-	offset += sizeof(*memdev);
 	/* dcr-descriptor0 */
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
@@ -2101,8 +2135,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[5];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
-
 	offset += dcr->header.length;
+
 	memdev = nfit_buf + offset;
 	memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP;
 	memdev->header.length = sizeof(*memdev);
@@ -2117,9 +2151,9 @@ static void nfit_test1_setup(struct nfit_test *t)
 	memdev->interleave_index = 0;
 	memdev->interleave_ways = 1;
 	memdev->flags = ACPI_NFIT_MEM_MAP_FAILED;
+	offset += memdev->header.length;
 
 	/* dcr-descriptor1 */
-	offset += sizeof(*memdev);
 	dcr = nfit_buf + offset;
 	dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION;
 	dcr->header.length = offsetof(struct acpi_nfit_control_region,
@@ -2129,6 +2163,7 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->serial_number = ~handle[6];
 	dcr->code = NFIT_FIC_BYTE;
 	dcr->windows = 0;
+	offset += dcr->header.length;
 
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA2_SIZE);
-- 
cgit v1.2.3


From 9741a559971856fca61a83840b558b4f94500d89 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 27 Feb 2018 10:29:51 -0700
Subject: nfit_test: fix buffer overrun, add sanity check

It turns out that we were overrunning the 'nfit_buf' buffer in
nfit_test0_setup() in the (t->setup_hotplug == 1) case because we failed to
correctly account for all of the acpi_nfit_memory_map structures.

Fix the structure count which will increase the allocation size of
'nfit_buf' in nfit_test0_alloc().  Also add some WARN_ON()s to
nfit_test0_setup() and nfit_test1_setup() to catch future issues where the
size of the buffer doesn't match the amount of data we're writing.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index 1376fc95c33a..fcd233342273 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -104,7 +104,8 @@ enum {
 	NUM_HINTS = 8,
 	NUM_BDW = NUM_DCR,
 	NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW,
-	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */,
+	NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */
+		+ 4 /* spa1 iset */ + 1 /* spa11 iset */,
 	DIMM_SIZE = SZ_32M,
 	LABEL_SIZE = SZ_128K,
 	SPA_VCD_SIZE = SZ_4M,
@@ -2047,6 +2048,9 @@ static void nfit_test0_setup(struct nfit_test *t)
 			flush->hint_address[i] = t->flush_dma[4]
 				+ i * sizeof(u64);
 		offset += flush->header.length;
+
+		/* sanity check to make sure we've filled the buffer */
+		WARN_ON(offset != t->nfit_size);
 	}
 
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
@@ -2165,6 +2169,9 @@ static void nfit_test1_setup(struct nfit_test *t)
 	dcr->windows = 0;
 	offset += dcr->header.length;
 
+	/* sanity check to make sure we've filled the buffer */
+	WARN_ON(offset != t->nfit_size);
+
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA2_SIZE);
 
-- 
cgit v1.2.3


From 1526f9e2ac1df62106c15a18fd6895258c579c90 Mon Sep 17 00:00:00 2001
From: Ross Zwisler <ross.zwisler@linux.intel.com>
Date: Tue, 27 Feb 2018 10:29:52 -0700
Subject: nfit_test: prevent parsing error of nfit_test.0

When you load nfit_test you currently see the following error in dmesg:

 nfit_test nfit_test.0: found a zero length table '0' parsing nfit

This happens because when we parse the nfit_test.0 table via
acpi_nfit_init(), we specify a size of nfit_test->nfit_size.  For the first
pass through nfit_test.0 where (t->setup_hotplug == 0) this is the size of
the entire buffer we allocated, including space for the hot plug
structures, not the size that we've actually filled in.

Fix this by only trying to parse the size of the structures that we've
filled in.

Signed-off-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index fcd233342273..d785235ba04e 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -154,6 +154,7 @@ struct nfit_test {
 	void *nfit_buf;
 	dma_addr_t nfit_dma;
 	size_t nfit_size;
+	size_t nfit_filled;
 	int dcr_idx;
 	int num_dcr;
 	int num_pm;
@@ -2053,6 +2054,8 @@ static void nfit_test0_setup(struct nfit_test *t)
 		WARN_ON(offset != t->nfit_size);
 	}
 
+	t->nfit_filled = offset;
+
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA0_SIZE);
 
@@ -2172,6 +2175,8 @@ static void nfit_test1_setup(struct nfit_test *t)
 	/* sanity check to make sure we've filled the buffer */
 	WARN_ON(offset != t->nfit_size);
 
+	t->nfit_filled = offset;
+
 	post_ars_status(&t->ars_state, &t->badrange, t->spa_set_dma[0],
 			SPA2_SIZE);
 
@@ -2529,7 +2534,7 @@ static int nfit_test_probe(struct platform_device *pdev)
 	nd_desc->ndctl = nfit_test_ctl;
 
 	rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_buf,
-			nfit_test->nfit_size);
+			nfit_test->nfit_filled);
 	if (rc)
 		return rc;
 
-- 
cgit v1.2.3


From 71cfdd0bad3ad91680e6b82cac634154cf56376e Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 14 Mar 2018 19:25:06 +0100
Subject: libnvdimm: provide module_nd_driver wrapper

Provide a module_nd_driver() wrapper over simple nd_driver_register()
nd_driver_unregister() combinations in module_init() and module_exit()
respectively.

Note an explicit nd_driver_unregister() had to be implemented as nd
bus drivers did call device_unregister() direcly in the module_exit()
function.

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 include/linux/nd.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/nd.h b/include/linux/nd.h
index 5dc6b695437d..43c181a6add5 100644
--- a/include/linux/nd.h
+++ b/include/linux/nd.h
@@ -180,6 +180,12 @@ struct nd_region;
 void nvdimm_region_notify(struct nd_region *nd_region, enum nvdimm_event event);
 int __must_check __nd_driver_register(struct nd_device_driver *nd_drv,
 		struct module *module, const char *mod_name);
+static inline void nd_driver_unregister(struct nd_device_driver *drv)
+{
+	driver_unregister(&drv->drv);
+}
 #define nd_driver_register(driver) \
 	__nd_driver_register(driver, THIS_MODULE, KBUILD_MODNAME)
+#define module_nd_driver(driver) \
+	module_driver(driver, nd_driver_register, nd_driver_unregister)
 #endif /* __LINUX_ND_H__ */
-- 
cgit v1.2.3


From 03e908437d007d8a8583c4ea19c737fff2ef6edc Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 14 Mar 2018 19:25:07 +0100
Subject: libnvdimm, pmem: use module_nd_driver

Use module_nd_driver() instead of having module_init() and
module_exit() callbacks which just call nd_driver_register() and
nd_driver_unregister().

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/pmem.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 906d31e56261..85dfb3dc3981 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -547,17 +547,7 @@ static struct nd_device_driver nd_pmem_driver = {
 	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
 };
 
-static int __init pmem_init(void)
-{
-	return nd_driver_register(&nd_pmem_driver);
-}
-module_init(pmem_init);
-
-static void pmem_exit(void)
-{
-	driver_unregister(&nd_pmem_driver.drv);
-}
-module_exit(pmem_exit);
+module_nd_driver(nd_pmem_driver);
 
 MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
 MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3


From d812b0d500040b8c7b9c7a93f29be0a010eb09d1 Mon Sep 17 00:00:00 2001
From: Johannes Thumshirn <jthumshirn@suse.de>
Date: Wed, 14 Mar 2018 19:25:08 +0100
Subject: device-dax: use module_nd_driver

Use module_nd_driver() instead of having module_init() and
module_exit() callbacks which just call nd_driver_register() and
nd_driver_unregister().

Signed-off-by: Johannes Thumshirn <jthumshirn@suse.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/pmem.c | 12 +-----------
 1 file changed, 1 insertion(+), 11 deletions(-)

diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index f7c0137e9325..fd49b24fd6af 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -150,17 +150,7 @@ static struct nd_device_driver dax_pmem_driver = {
 	.type = ND_DRIVER_DAX_PMEM,
 };
 
-static int __init dax_pmem_init(void)
-{
-	return nd_driver_register(&dax_pmem_driver);
-}
-module_init(dax_pmem_init);
-
-static void __exit dax_pmem_exit(void)
-{
-	driver_unregister(&dax_pmem_driver.drv);
-}
-module_exit(dax_pmem_exit);
+module_nd_driver(dax_pmem_driver);
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
-- 
cgit v1.2.3


From 36de6f518f2e323cb82683d1ee5f62c48b05e8ee Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 23 Feb 2018 14:59:21 -0700
Subject: libnvdimm, label: change min label storage size per UEFI 2.7

UEFI 2.7 defines in page 758 that:

  Initial Label Storage Area Configuration
     :
  The minimum size of the Label Storage Area is large enough to
  hold 2 index blocks and 2 labels.

The mininum index block size is 256 bytes, and the minimum label size
is also 256 bytes.

Change ND_LABEL_MIN_SIZE to (256 * 4) so that NVDIMM devices with
the minimum label storage area do not fail with the size check in
nvdimm_init_config_data().

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/label.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h
index 1ebf4d3d01ba..18bbe183b3a9 100644
--- a/drivers/nvdimm/label.h
+++ b/drivers/nvdimm/label.h
@@ -33,7 +33,7 @@ enum {
 	BTTINFO_UUID_LEN = 16,
 	BTTINFO_FLAG_ERROR = 0x1,    /* error state (read-only) */
 	BTTINFO_MAJOR_VERSION = 1,
-	ND_LABEL_MIN_SIZE = 512 * 129, /* see sizeof_namespace_index() */
+	ND_LABEL_MIN_SIZE = 256 * 4, /* see sizeof_namespace_index() */
 	ND_LABEL_ID_SIZE = 50,
 	ND_NSINDEX_INIT = 0x1,
 };
-- 
cgit v1.2.3


From 9e694d9c18dde0fa57ec067043b581bd57c9fb65 Mon Sep 17 00:00:00 2001
From: Toshi Kani <toshi.kani@hpe.com>
Date: Fri, 23 Feb 2018 14:59:22 -0700
Subject: libnvdimm, label: change nvdimm_num_label_slots per UEFI 2.7

sizeof_namespace_index() fails when NVDIMM devices have the minimum
1024 bytes label storage area.  nvdimm_num_label_slots() returns 3
slots while the area is only big enough for 2 slots.

Change nvdimm_num_label_slots() to calculate a number of label slots
according to UEFI 2.7 spec.

Signed-off-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/label.c | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/drivers/nvdimm/label.c b/drivers/nvdimm/label.c
index c858cfadea40..1d28cd656536 100644
--- a/drivers/nvdimm/label.c
+++ b/drivers/nvdimm/label.c
@@ -45,9 +45,27 @@ unsigned sizeof_namespace_label(struct nvdimm_drvdata *ndd)
 	return ndd->nslabel_size;
 }
 
+static size_t __sizeof_namespace_index(u32 nslot)
+{
+	return ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
+			NSINDEX_ALIGN);
+}
+
+static int __nvdimm_num_label_slots(struct nvdimm_drvdata *ndd,
+		size_t index_size)
+{
+	return (ndd->nsarea.config_size - index_size * 2) /
+			sizeof_namespace_label(ndd);
+}
+
 int nvdimm_num_label_slots(struct nvdimm_drvdata *ndd)
 {
-	return ndd->nsarea.config_size / (sizeof_namespace_label(ndd) + 1);
+	u32 tmp_nslot, n;
+
+	tmp_nslot = ndd->nsarea.config_size / sizeof_namespace_label(ndd);
+	n = __sizeof_namespace_index(tmp_nslot) / NSINDEX_ALIGN;
+
+	return __nvdimm_num_label_slots(ndd, NSINDEX_ALIGN * n);
 }
 
 size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
@@ -55,18 +73,14 @@ size_t sizeof_namespace_index(struct nvdimm_drvdata *ndd)
 	u32 nslot, space, size;
 
 	/*
-	 * The minimum index space is 512 bytes, with that amount of
-	 * index we can describe ~1400 labels which is less than a byte
-	 * of overhead per label.  Round up to a byte of overhead per
-	 * label and determine the size of the index region.  Yes, this
-	 * starts to waste space at larger config_sizes, but it's
-	 * unlikely we'll ever see anything but 128K.
+	 * Per UEFI 2.7, the minimum size of the Label Storage Area is large
+	 * enough to hold 2 index blocks and 2 labels.  The minimum index
+	 * block size is 256 bytes, and the minimum label size is 256 bytes.
 	 */
 	nslot = nvdimm_num_label_slots(ndd);
 	space = ndd->nsarea.config_size - nslot * sizeof_namespace_label(ndd);
-	size = ALIGN(sizeof(struct nd_namespace_index) + DIV_ROUND_UP(nslot, 8),
-			NSINDEX_ALIGN) * 2;
-	if (size <= space)
+	size = __sizeof_namespace_index(nslot) * 2;
+	if (size <= space && nslot >= 2)
 		return size / 2;
 
 	dev_err(ndd->dev, "label area (%d) too small to host (%d byte) labels\n",
-- 
cgit v1.2.3


From 0731de476a37c33485af82d64041c9d193208df8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 21 Mar 2018 21:22:34 -0700
Subject: nfit: skip region registration for incomplete control regions

Per the ACPI specification the only functional purpose for a DIMM
Control Region to be mapped into the system physical address space, from
an OSPM perspective, is to support block-apertures. However, there are
some BIOSen that publish DIMM Control Region SPA entries for pre-boot
environment consumption.  Undo the kernel policy of generating disabled
'ndblk' regions when this configuration is detected.

Cc: <stable@vger.kernel.org>
Fixes: 1f7df6f88b92 ("libnvdimm, nfit: regions (block-data-window...)")
Reviewed-by: Toshi Kani <toshi.kani@hpe.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 39ad06143e78..4530d89044db 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2578,7 +2578,7 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	struct nd_blk_region_desc *ndbr_desc;
 	struct nfit_mem *nfit_mem;
-	int blk_valid = 0, rc;
+	int rc;
 
 	if (!nvdimm) {
 		dev_err(acpi_desc->dev, "spa%d dimm: %#x not found\n",
@@ -2598,15 +2598,14 @@ static int acpi_nfit_init_mapping(struct acpi_nfit_desc *acpi_desc,
 		if (!nfit_mem || !nfit_mem->bdw) {
 			dev_dbg(acpi_desc->dev, "spa%d %s missing bdw\n",
 					spa->range_index, nvdimm_name(nvdimm));
-		} else {
-			mapping->size = nfit_mem->bdw->capacity;
-			mapping->start = nfit_mem->bdw->start_address;
-			ndr_desc->num_lanes = nfit_mem->bdw->windows;
-			blk_valid = 1;
+			break;
 		}
 
+		mapping->size = nfit_mem->bdw->capacity;
+		mapping->start = nfit_mem->bdw->start_address;
+		ndr_desc->num_lanes = nfit_mem->bdw->windows;
 		ndr_desc->mapping = mapping;
-		ndr_desc->num_mappings = blk_valid;
+		ndr_desc->num_mappings = 1;
 		ndbr_desc = to_blk_region_desc(ndr_desc);
 		ndbr_desc->enable = acpi_nfit_blk_region_enable;
 		ndbr_desc->do_io = acpi_desc->blk_do_io;
-- 
cgit v1.2.3


From 466d1493ea830789a2f063f478aaed2e324f0d3d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 28 Mar 2018 10:44:50 -0700
Subject: acpi, nfit: rework NVDIMM leaf method detection

Some BIOSen do not handle 0-byte transfer lengths for the _LSR and _LSW
(label storage read/write) methods. This causes Linux to fallback to the
deprecated _DSM path, or otherwise disable label support.

Introduce acpi_nvdimm_has_method() to detect whether a method is
available rather than calling the method, require _LSI and _LSR to be
paired, and require read support before enabling write support.

Cc: <stable@vger.kernel.org>
Fixes: 4b27db7e26cd ("acpi, nfit: add support for the _LS...")
Suggested-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 41 +++++++++++++++++++++--------------------
 drivers/acpi/nfit/nfit.h |  5 ++---
 2 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 4530d89044db..12fb414fa678 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -196,7 +196,7 @@ static int xlat_nvdimm_status(struct nvdimm *nvdimm, void *buf, unsigned int cmd
 		 * In the _LSI, _LSR, _LSW case the locked status is
 		 * communicated via the read/write commands
 		 */
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr)
 			break;
 
 		if (status >> 16 & ND_CONFIG_LOCKED)
@@ -483,7 +483,7 @@ int acpi_nfit_ctl(struct nvdimm_bus_descriptor *nd_desc, struct nvdimm *nvdimm,
 			min_t(u32, 256, in_buf.buffer.length), true);
 
 	/* call the BIOS, prefer the named methods over _DSM if available */
-	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsi)
+	if (nvdimm && cmd == ND_CMD_GET_CONFIG_SIZE && nfit_mem->has_lsr)
 		out_obj = acpi_label_info(handle);
 	else if (nvdimm && cmd == ND_CMD_GET_CONFIG_DATA && nfit_mem->has_lsr) {
 		struct nd_cmd_get_config_data_hdr *p = buf;
@@ -1653,12 +1653,23 @@ static void acpi_nvdimm_notify(acpi_handle handle, u32 event, void *data)
 	device_unlock(dev->parent);
 }
 
+static bool acpi_nvdimm_has_method(struct acpi_device *adev, char *method)
+{
+	acpi_handle handle;
+	acpi_status status;
+
+	status = acpi_get_handle(adev->handle, method, &handle);
+
+	if (ACPI_SUCCESS(status))
+		return true;
+	return false;
+}
+
 static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 		struct nfit_mem *nfit_mem, u32 device_handle)
 {
 	struct acpi_device *adev, *adev_dimm;
 	struct device *dev = acpi_desc->dev;
-	union acpi_object *obj;
 	unsigned long dsm_mask;
 	const guid_t *guid;
 	int i;
@@ -1731,25 +1742,15 @@ static int acpi_nfit_add_dimm(struct acpi_nfit_desc *acpi_desc,
 					1ULL << i))
 			set_bit(i, &nfit_mem->dsm_mask);
 
-	obj = acpi_label_info(adev_dimm->handle);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsi = 1;
-		dev_dbg(dev, "%s: has _LSI\n", dev_name(&adev_dimm->dev));
-	}
-
-	obj = acpi_label_read(adev_dimm->handle, 0, 0);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsr = 1;
+	if (acpi_nvdimm_has_method(adev_dimm, "_LSI")
+			&& acpi_nvdimm_has_method(adev_dimm, "_LSR")) {
 		dev_dbg(dev, "%s: has _LSR\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsr = true;
 	}
 
-	obj = acpi_label_write(adev_dimm->handle, 0, 0, NULL);
-	if (obj) {
-		ACPI_FREE(obj);
-		nfit_mem->has_lsw = 1;
+	if (nfit_mem->has_lsr && acpi_nvdimm_has_method(adev_dimm, "_LSW")) {
 		dev_dbg(dev, "%s: has _LSW\n", dev_name(&adev_dimm->dev));
+		nfit_mem->has_lsw = true;
 	}
 
 	return 0;
@@ -1838,10 +1839,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
 			cmd_mask |= nfit_mem->dsm_mask & NVDIMM_STANDARD_CMDMASK;
 		}
 
-		if (nfit_mem->has_lsi)
+		if (nfit_mem->has_lsr) {
 			set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask);
-		if (nfit_mem->has_lsr)
 			set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask);
+		}
 		if (nfit_mem->has_lsw)
 			set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask);
 
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 50d36e166d70..ac9c49463731 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -171,9 +171,8 @@ struct nfit_mem {
 	struct resource *flush_wpq;
 	unsigned long dsm_mask;
 	int family;
-	u32 has_lsi:1;
-	u32 has_lsr:1;
-	u32 has_lsw:1;
+	bool has_lsr;
+	bool has_lsw;
 };
 
 struct acpi_nfit_desc {
-- 
cgit v1.2.3


From 3fe0791c295cfd3cd735de7a32cc0780949c009f Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 14 Oct 2017 17:13:45 -0700
Subject: dax: store pfns in the radix

In preparation for examining the busy state of dax pages in the truncate
path, switch from sectors to pfns in the radix.

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/super.c | 15 ++++++++--
 fs/dax.c            | 83 ++++++++++++++++++++---------------------------------
 2 files changed, 43 insertions(+), 55 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index ecdc292aa4e4..2b2332b605e4 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -124,10 +124,19 @@ int __bdev_dax_supported(struct super_block *sb, int blocksize)
 		return len < 0 ? len : -EIO;
 	}
 
-	if ((IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn))
-			|| pfn_t_devmap(pfn))
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED) && pfn_t_special(pfn)) {
+		/*
+		 * An arch that has enabled the pmem api should also
+		 * have its drivers support pfn_t_devmap()
+		 *
+		 * This is a developer warning and should not trigger in
+		 * production. dax_flush() will crash since it depends
+		 * on being able to do (page_address(pfn_to_page())).
+		 */
+		WARN_ON(IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API));
+	} else if (pfn_t_devmap(pfn)) {
 		/* pass */;
-	else {
+	} else {
 		pr_debug("VFS (%s): error: dax support not enabled\n",
 				sb->s_id);
 		return -EOPNOTSUPP;
diff --git a/fs/dax.c b/fs/dax.c
index 0276df90e86c..b646a46e4d12 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -73,16 +73,15 @@ fs_initcall(init_dax_wait_table);
 #define RADIX_DAX_ZERO_PAGE	(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 2))
 #define RADIX_DAX_EMPTY		(1 << (RADIX_TREE_EXCEPTIONAL_SHIFT + 3))
 
-static unsigned long dax_radix_sector(void *entry)
+static unsigned long dax_radix_pfn(void *entry)
 {
 	return (unsigned long)entry >> RADIX_DAX_SHIFT;
 }
 
-static void *dax_radix_locked_entry(sector_t sector, unsigned long flags)
+static void *dax_radix_locked_entry(unsigned long pfn, unsigned long flags)
 {
 	return (void *)(RADIX_TREE_EXCEPTIONAL_ENTRY | flags |
-			((unsigned long)sector << RADIX_DAX_SHIFT) |
-			RADIX_DAX_ENTRY_LOCK);
+			(pfn << RADIX_DAX_SHIFT) | RADIX_DAX_ENTRY_LOCK);
 }
 
 static unsigned int dax_radix_order(void *entry)
@@ -526,12 +525,13 @@ static int copy_user_dax(struct block_device *bdev, struct dax_device *dax_dev,
  */
 static void *dax_insert_mapping_entry(struct address_space *mapping,
 				      struct vm_fault *vmf,
-				      void *entry, sector_t sector,
+				      void *entry, pfn_t pfn_t,
 				      unsigned long flags, bool dirty)
 {
 	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *new_entry;
+	unsigned long pfn = pfn_t_to_pfn(pfn_t);
 	pgoff_t index = vmf->pgoff;
+	void *new_entry;
 
 	if (dirty)
 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -546,7 +546,7 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 	}
 
 	spin_lock_irq(&mapping->tree_lock);
-	new_entry = dax_radix_locked_entry(sector, flags);
+	new_entry = dax_radix_locked_entry(pfn, flags);
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*
@@ -657,17 +657,14 @@ unlock_pte:
 	i_mmap_unlock_read(mapping);
 }
 
-static int dax_writeback_one(struct block_device *bdev,
-		struct dax_device *dax_dev, struct address_space *mapping,
-		pgoff_t index, void *entry)
+static int dax_writeback_one(struct dax_device *dax_dev,
+		struct address_space *mapping, pgoff_t index, void *entry)
 {
 	struct radix_tree_root *page_tree = &mapping->page_tree;
-	void *entry2, **slot, *kaddr;
-	long ret = 0, id;
-	sector_t sector;
-	pgoff_t pgoff;
+	void *entry2, **slot;
+	unsigned long pfn;
+	long ret = 0;
 	size_t size;
-	pfn_t pfn;
 
 	/*
 	 * A page got tagged dirty in DAX mapping? Something is seriously
@@ -683,10 +680,10 @@ static int dax_writeback_one(struct block_device *bdev,
 		goto put_unlocked;
 	/*
 	 * Entry got reallocated elsewhere? No need to writeback. We have to
-	 * compare sectors as we must not bail out due to difference in lockbit
+	 * compare pfns as we must not bail out due to difference in lockbit
 	 * or entry type.
 	 */
-	if (dax_radix_sector(entry2) != dax_radix_sector(entry))
+	if (dax_radix_pfn(entry2) != dax_radix_pfn(entry))
 		goto put_unlocked;
 	if (WARN_ON_ONCE(dax_is_empty_entry(entry) ||
 				dax_is_zero_entry(entry))) {
@@ -712,33 +709,15 @@ static int dax_writeback_one(struct block_device *bdev,
 	/*
 	 * Even if dax_writeback_mapping_range() was given a wbc->range_start
 	 * in the middle of a PMD, the 'index' we are given will be aligned to
-	 * the start index of the PMD, as will the sector we pull from
-	 * 'entry'.  This allows us to flush for PMD_SIZE and not have to
-	 * worry about partial PMD writebacks.
+	 * the start index of the PMD, as will the pfn we pull from 'entry'.
+	 * This allows us to flush for PMD_SIZE and not have to worry about
+	 * partial PMD writebacks.
 	 */
-	sector = dax_radix_sector(entry);
+	pfn = dax_radix_pfn(entry);
 	size = PAGE_SIZE << dax_radix_order(entry);
 
-	id = dax_read_lock();
-	ret = bdev_dax_pgoff(bdev, sector, size, &pgoff);
-	if (ret)
-		goto dax_unlock;
-
-	/*
-	 * dax_direct_access() may sleep, so cannot hold tree_lock over
-	 * its invocation.
-	 */
-	ret = dax_direct_access(dax_dev, pgoff, size / PAGE_SIZE, &kaddr, &pfn);
-	if (ret < 0)
-		goto dax_unlock;
-
-	if (WARN_ON_ONCE(ret < size / PAGE_SIZE)) {
-		ret = -EIO;
-		goto dax_unlock;
-	}
-
-	dax_mapping_entry_mkclean(mapping, index, pfn_t_to_pfn(pfn));
-	dax_flush(dax_dev, kaddr, size);
+	dax_mapping_entry_mkclean(mapping, index, pfn);
+	dax_flush(dax_dev, page_address(pfn_to_page(pfn)), size);
 	/*
 	 * After we have flushed the cache, we can clear the dirty tag. There
 	 * cannot be new dirty data in the pfn after the flush has completed as
@@ -749,8 +728,6 @@ static int dax_writeback_one(struct block_device *bdev,
 	radix_tree_tag_clear(page_tree, index, PAGECACHE_TAG_DIRTY);
 	spin_unlock_irq(&mapping->tree_lock);
 	trace_dax_writeback_one(mapping->host, index, size >> PAGE_SHIFT);
- dax_unlock:
-	dax_read_unlock(id);
 	put_locked_mapping_entry(mapping, index);
 	return ret;
 
@@ -808,8 +785,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 				break;
 			}
 
-			ret = dax_writeback_one(bdev, dax_dev, mapping,
-					indices[i], pvec.pages[i]);
+			ret = dax_writeback_one(dax_dev, mapping, indices[i],
+					pvec.pages[i]);
 			if (ret < 0) {
 				mapping_set_error(mapping, ret);
 				goto out;
@@ -877,6 +854,7 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 	int ret = VM_FAULT_NOPAGE;
 	struct page *zero_page;
 	void *entry2;
+	pfn_t pfn;
 
 	zero_page = ZERO_PAGE(0);
 	if (unlikely(!zero_page)) {
@@ -884,14 +862,15 @@ static int dax_load_hole(struct address_space *mapping, void *entry,
 		goto out;
 	}
 
-	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	entry2 = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(entry2)) {
 		ret = VM_FAULT_SIGBUS;
 		goto out;
 	}
 
-	vm_insert_mixed(vmf->vma, vaddr, page_to_pfn_t(zero_page));
+	vm_insert_mixed(vmf->vma, vaddr, pfn);
 out:
 	trace_dax_load_hole(inode, vmf, ret);
 	return ret;
@@ -1200,8 +1179,7 @@ static int dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto error_finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						 dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						 0, write && !sync);
 		if (IS_ERR(entry)) {
 			error = PTR_ERR(entry);
@@ -1280,13 +1258,15 @@ static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
 	void *ret = NULL;
 	spinlock_t *ptl;
 	pmd_t pmd_entry;
+	pfn_t pfn;
 
 	zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
 
 	if (unlikely(!zero_page))
 		goto fallback;
 
-	ret = dax_insert_mapping_entry(mapping, vmf, entry, 0,
+	pfn = page_to_pfn_t(zero_page);
+	ret = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 			RADIX_DAX_PMD | RADIX_DAX_ZERO_PAGE, false);
 	if (IS_ERR(ret))
 		goto fallback;
@@ -1409,8 +1389,7 @@ static int dax_iomap_pmd_fault(struct vm_fault *vmf, pfn_t *pfnp,
 		if (error < 0)
 			goto finish_iomap;
 
-		entry = dax_insert_mapping_entry(mapping, vmf, entry,
-						dax_iomap_sector(&iomap, pos),
+		entry = dax_insert_mapping_entry(mapping, vmf, entry, pfn,
 						RADIX_DAX_PMD, write && !sync);
 		if (IS_ERR(entry))
 			goto finish_iomap;
-- 
cgit v1.2.3


From f44c77630d26ca2c2a60b20c47dd9ce07c4361b3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 7 Mar 2018 15:26:44 -0800
Subject: fs, dax: prepare for dax-specific address_space_operations

In preparation for the dax implementation to start associating dax pages
to inodes via page->mapping, we need to provide a 'struct
address_space_operations' instance for dax. Define some generic VFS aops
helpers for dax. These noop implementations are there in the dax case to
prevent the VFS from falling back to operations with page-cache
assumptions, dax_writeback_mapping_range() may not be referenced in the
FS_DAX=n case.

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Matthew Wilcox <mawilcox@microsoft.com>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Suggested-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/libfs.c          | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/dax.h | 12 +++++++++---
 include/linux/fs.h  |  4 ++++
 3 files changed, 52 insertions(+), 3 deletions(-)

diff --git a/fs/libfs.c b/fs/libfs.c
index 7ff3cb904acd..0fb590d79f30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1060,6 +1060,45 @@ int noop_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 }
 EXPORT_SYMBOL(noop_fsync);
 
+int noop_set_page_dirty(struct page *page)
+{
+	/*
+	 * Unlike __set_page_dirty_no_writeback that handles dirty page
+	 * tracking in the page object, dax does all dirty tracking in
+	 * the inode address_space in response to mkwrite faults. In the
+	 * dax case we only need to worry about potentially dirty CPU
+	 * caches, not dirty page cache pages to write back.
+	 *
+	 * This callback is defined to prevent fallback to
+	 * __set_page_dirty_buffers() in set_page_dirty().
+	 */
+	return 0;
+}
+EXPORT_SYMBOL_GPL(noop_set_page_dirty);
+
+void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length)
+{
+	/*
+	 * There is no page cache to invalidate in the dax case, however
+	 * we need this callback defined to prevent falling back to
+	 * block_invalidatepage() in do_invalidatepage().
+	 */
+}
+EXPORT_SYMBOL_GPL(noop_invalidatepage);
+
+ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+	/*
+	 * iomap based filesystems support direct I/O without need for
+	 * this callback. However, it still needs to be set in
+	 * inode->a_ops so that open/fcntl know that direct I/O is
+	 * generally supported.
+	 */
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(noop_direct_IO);
+
 /* Because kfree isn't assignment-compatible with void(void*) ;-/ */
 void kfree_link(void *p)
 {
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 0185ecdae135..ae27a7efe7ab 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -38,6 +38,7 @@ static inline void put_dax(struct dax_device *dax_dev)
 }
 #endif
 
+struct writeback_control;
 int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff);
 #if IS_ENABLED(CONFIG_FS_DAX)
 int __bdev_dax_supported(struct super_block *sb, int blocksize);
@@ -57,6 +58,8 @@ static inline void fs_put_dax(struct dax_device *dax_dev)
 }
 
 struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev);
+int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc);
 #else
 static inline int bdev_dax_supported(struct super_block *sb, int blocksize)
 {
@@ -76,6 +79,12 @@ static inline struct dax_device *fs_dax_get_by_bdev(struct block_device *bdev)
 {
 	return NULL;
 }
+
+static inline int dax_writeback_mapping_range(struct address_space *mapping,
+		struct block_device *bdev, struct writeback_control *wbc)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 int dax_read_lock(void);
@@ -121,7 +130,4 @@ static inline bool dax_mapping(struct address_space *mapping)
 	return mapping->host && IS_DAX(mapping->host);
 }
 
-struct writeback_control;
-int dax_writeback_mapping_range(struct address_space *mapping,
-		struct block_device *bdev, struct writeback_control *wbc);
 #endif
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 79c413985305..44f7f7080faa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3129,6 +3129,10 @@ extern int simple_rmdir(struct inode *, struct dentry *);
 extern int simple_rename(struct inode *, struct dentry *,
 			 struct inode *, struct dentry *, unsigned int);
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
+extern int noop_set_page_dirty(struct page *page);
+extern void noop_invalidatepage(struct page *page, unsigned int offset,
+		unsigned int length);
+extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
 extern int simple_empty(struct dentry *);
 extern int simple_readpage(struct file *file, struct page *page);
 extern int simple_write_begin(struct file *file, struct address_space *mapping,
-- 
cgit v1.2.3


From 15aa8a01189b7399d085cceae6775cc6899b1909 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sun, 11 Mar 2018 12:03:08 -0700
Subject: block, dax: remove dead code in blkdev_writepages()

Block device inodes never have S_DAX set, so kill the check for DAX and
diversion to dax_writeback_mapping_range().

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/block_dev.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index fe09ef9c21f3..846ee2d31781 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1946,11 +1946,6 @@ static int blkdev_releasepage(struct page *page, gfp_t wait)
 static int blkdev_writepages(struct address_space *mapping,
 			     struct writeback_control *wbc)
 {
-	if (dax_mapping(mapping)) {
-		struct block_device *bdev = I_BDEV(mapping->host);
-
-		return dax_writeback_mapping_range(mapping, bdev, wbc);
-	}
 	return generic_writepages(mapping, wbc);
 }
 
-- 
cgit v1.2.3


From 6e2608dfd93464bb26ba868b301ad5336c8c1df8 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Wed, 7 Mar 2018 15:26:44 -0800
Subject: xfs, dax: introduce xfs_dax_aops

In preparation for the dax implementation to start associating dax pages
to inodes via page->mapping, we need to provide a 'struct
address_space_operations' instance for dax. Otherwise, direct-I/O
triggers incorrect page cache assumptions and warnings like the
following:

 WARNING: CPU: 27 PID: 1783 at fs/xfs/xfs_aops.c:1468
 xfs_vm_set_page_dirty+0xf3/0x1b0 [xfs]
 [..]
 CPU: 27 PID: 1783 Comm: dma-collision Tainted: G           O 4.15.0-rc2+ #984
 [..]
 Call Trace:
  set_page_dirty_lock+0x40/0x60
  bio_set_pages_dirty+0x37/0x50
  iomap_dio_actor+0x2b7/0x3b0
  ? iomap_dio_zero+0x110/0x110
  iomap_apply+0xa4/0x110
  iomap_dio_rw+0x29e/0x3b0
  ? iomap_dio_zero+0x110/0x110
  ? xfs_file_dio_aio_read+0x7c/0x1a0 [xfs]
  xfs_file_dio_aio_read+0x7c/0x1a0 [xfs]
  xfs_file_read_iter+0xa0/0xc0 [xfs]
  __vfs_read+0xf9/0x170
  vfs_read+0xa6/0x150
  SyS_pread64+0x93/0xb0
  entry_SYSCALL_64_fastpath+0x1f/0x96

...where the default set_page_dirty() handler assumes that dirty state
is being tracked in 'struct page' flags.

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Suggested-by: Jan Kara <jack@suse.cz>
Suggested-by: Dave Chinner <david@fromorbit.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/xfs/xfs_aops.c | 34 ++++++++++++++++++----------------
 fs/xfs/xfs_aops.h |  1 +
 fs/xfs/xfs_iops.c |  5 ++++-
 3 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 9c6a830da0ee..e7a56c4786ff 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -1194,16 +1194,22 @@ xfs_vm_writepages(
 	int			ret;
 
 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
-	if (dax_mapping(mapping))
-		return dax_writeback_mapping_range(mapping,
-				xfs_find_bdev_for_inode(mapping->host), wbc);
-
 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
 	if (wpc.ioend)
 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
 	return ret;
 }
 
+STATIC int
+xfs_dax_writepages(
+	struct address_space	*mapping,
+	struct writeback_control *wbc)
+{
+	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
+	return dax_writeback_mapping_range(mapping,
+			xfs_find_bdev_for_inode(mapping->host), wbc);
+}
+
 /*
  * Called to move a page into cleanable state - and from there
  * to be released. The page should already be clean. We always
@@ -1367,17 +1373,6 @@ out_unlock:
 	return error;
 }
 
-STATIC ssize_t
-xfs_vm_direct_IO(
-	struct kiocb		*iocb,
-	struct iov_iter		*iter)
-{
-	/*
-	 * We just need the method present so that open/fcntl allow direct I/O.
-	 */
-	return -EINVAL;
-}
-
 STATIC sector_t
 xfs_vm_bmap(
 	struct address_space	*mapping,
@@ -1500,8 +1495,15 @@ const struct address_space_operations xfs_address_space_operations = {
 	.releasepage		= xfs_vm_releasepage,
 	.invalidatepage		= xfs_vm_invalidatepage,
 	.bmap			= xfs_vm_bmap,
-	.direct_IO		= xfs_vm_direct_IO,
+	.direct_IO		= noop_direct_IO,
 	.migratepage		= buffer_migrate_page,
 	.is_partially_uptodate  = block_is_partially_uptodate,
 	.error_remove_page	= generic_error_remove_page,
 };
+
+const struct address_space_operations xfs_dax_aops = {
+	.writepages		= xfs_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index 88c85ea63da0..69346d460dfa 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -54,6 +54,7 @@ struct xfs_ioend {
 };
 
 extern const struct address_space_operations xfs_address_space_operations;
+extern const struct address_space_operations xfs_dax_aops;
 
 int	xfs_setfilesize(struct xfs_inode *ip, xfs_off_t offset, size_t size);
 
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 56475fcd76f2..951e84df5576 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -1272,7 +1272,10 @@ xfs_setup_iops(
 	case S_IFREG:
 		inode->i_op = &xfs_inode_operations;
 		inode->i_fop = &xfs_file_operations;
-		inode->i_mapping->a_ops = &xfs_address_space_operations;
+		if (IS_DAX(inode))
+			inode->i_mapping->a_ops = &xfs_dax_aops;
+		else
+			inode->i_mapping->a_ops = &xfs_address_space_operations;
 		break;
 	case S_IFDIR:
 		if (xfs_sb_version_hasasciici(&XFS_M(inode->i_sb)->m_sb))
-- 
cgit v1.2.3


From 5f0663bb4a64f588f0a2dd6d1be68d40f9af0086 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 21 Dec 2017 12:25:11 -0800
Subject: ext4, dax: introduce ext4_dax_aops

In preparation for the dax implementation to start associating dax pages
to inodes via page->mapping, we need to provide a 'struct
address_space_operations' instance for dax. Otherwise, direct-I/O
triggers incorrect page cache assumptions and warnings.

Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Andreas Dilger <adilger.kernel@dilger.ca>
Cc: linux-ext4@vger.kernel.org
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext4/inode.c | 42 +++++++++++++++++++++++++++++++-----------
 1 file changed, 31 insertions(+), 11 deletions(-)

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c94780075b04..249a97b19181 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2725,12 +2725,6 @@ static int ext4_writepages(struct address_space *mapping,
 	percpu_down_read(&sbi->s_journal_flag_rwsem);
 	trace_ext4_writepages(inode, wbc);
 
-	if (dax_mapping(mapping)) {
-		ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
-						  wbc);
-		goto out_writepages;
-	}
-
 	/*
 	 * No pages to write? This is mainly a kludge to avoid starting
 	 * a transaction for special inodes like journal inode on last iput()
@@ -2955,6 +2949,27 @@ out_writepages:
 	return ret;
 }
 
+static int ext4_dax_writepages(struct address_space *mapping,
+			       struct writeback_control *wbc)
+{
+	int ret;
+	long nr_to_write = wbc->nr_to_write;
+	struct inode *inode = mapping->host;
+	struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+
+	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
+		return -EIO;
+
+	percpu_down_read(&sbi->s_journal_flag_rwsem);
+	trace_ext4_writepages(inode, wbc);
+
+	ret = dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev, wbc);
+	trace_ext4_writepages_result(inode, wbc, ret,
+				     nr_to_write - wbc->nr_to_write);
+	percpu_up_read(&sbi->s_journal_flag_rwsem);
+	return ret;
+}
+
 static int ext4_nonda_switch(struct super_block *sb)
 {
 	s64 free_clusters, dirty_clusters;
@@ -3857,10 +3872,6 @@ static ssize_t ext4_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	if (ext4_has_inline_data(inode))
 		return 0;
 
-	/* DAX uses iomap path now */
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return 0;
-
 	trace_ext4_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
 	if (iov_iter_rw(iter) == READ)
 		ret = ext4_direct_IO_read(iocb, iter);
@@ -3946,6 +3957,13 @@ static const struct address_space_operations ext4_da_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext4_dax_aops = {
+	.writepages		= ext4_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 void ext4_set_aops(struct inode *inode)
 {
 	switch (ext4_inode_journal_mode(inode)) {
@@ -3958,7 +3976,9 @@ void ext4_set_aops(struct inode *inode)
 	default:
 		BUG();
 	}
-	if (test_opt(inode->i_sb, DELALLOC))
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext4_dax_aops;
+	else if (test_opt(inode->i_sb, DELALLOC))
 		inode->i_mapping->a_ops = &ext4_da_aops;
 	else
 		inode->i_mapping->a_ops = &ext4_aops;
-- 
cgit v1.2.3


From 8d0d8ed3356aa9ed43b819aaedd39b08ca453007 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 2 Apr 2018 16:49:30 -0700
Subject: nfit: fix region registration vs block-data-window ranges

Commit 1cf03c00e7c1 "nfit: scrub and register regions in a workqueue"
mistakenly attempts to register a region per BLK aperture. There is
nothing to register for individual apertures as they belong as a set to
a BLK aperture group that are registered with a corresponding
DIMM-control-region. Filter them for registration to prevent some
needless devm_kzalloc() allocations.

Cc: <stable@vger.kernel.org>
Fixes: 1cf03c00e7c1 ("nfit: scrub and register regions in a workqueue")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 12fb414fa678..ea9f3e727fef 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -3018,15 +3018,21 @@ static void acpi_nfit_scrub(struct work_struct *work)
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
-	int rc;
 
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		if (nfit_spa_type(nfit_spa->spa) == NFIT_SPA_DCR) {
-			/* BLK regions don't need to wait for ars results */
-			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
-			if (rc)
-				return rc;
-		}
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
+		int rc, type = nfit_spa_type(nfit_spa->spa);
+
+		/* PMEM and VMEM will be registered by the ARS workqueue */
+		if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE)
+			continue;
+		/* BLK apertures belong to BLK region registration below */
+		if (type == NFIT_SPA_BDW)
+			continue;
+		/* BLK regions don't need to wait for ARS results */
+		rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+		if (rc)
+			return rc;
+	}
 
 	acpi_desc->ars_start_flags = 0;
 	if (!acpi_desc->cancel)
-- 
cgit v1.2.3


From fb094c90748fbeba1063927eeb751add147b35b9 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 21 Dec 2017 12:25:11 -0800
Subject: ext2, dax: introduce ext2_dax_aops

In preparation for the dax implementation to start associating dax pages
to inodes via page->mapping, we need to provide a 'struct
address_space_operations' instance for dax. Otherwise, direct-I/O
triggers incorrect page cache assumptions and warnings.

Reviewed-by: Jan Kara <jack@suse.com>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/ext2/ext2.h  |  1 +
 fs/ext2/inode.c | 46 +++++++++++++++++++++++++++-------------------
 fs/ext2/namei.c | 18 ++----------------
 3 files changed, 30 insertions(+), 35 deletions(-)

diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 032295e1d386..cc40802ddfa8 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -814,6 +814,7 @@ extern const struct inode_operations ext2_file_inode_operations;
 extern const struct file_operations ext2_file_operations;
 
 /* inode.c */
+extern void ext2_set_file_ops(struct inode *inode);
 extern const struct address_space_operations ext2_aops;
 extern const struct address_space_operations ext2_nobh_aops;
 extern const struct iomap_ops ext2_iomap_ops;
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 9b2ac55ac34f..1e01fabef130 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -940,9 +940,6 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 	loff_t offset = iocb->ki_pos;
 	ssize_t ret;
 
-	if (WARN_ON_ONCE(IS_DAX(inode)))
-		return -EIO;
-
 	ret = blockdev_direct_IO(iocb, inode, iter, ext2_get_block);
 	if (ret < 0 && iov_iter_rw(iter) == WRITE)
 		ext2_write_failed(mapping, offset + count);
@@ -952,17 +949,16 @@ ext2_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 static int
 ext2_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
-#ifdef CONFIG_FS_DAX
-	if (dax_mapping(mapping)) {
-		return dax_writeback_mapping_range(mapping,
-						   mapping->host->i_sb->s_bdev,
-						   wbc);
-	}
-#endif
-
 	return mpage_writepages(mapping, wbc, ext2_get_block);
 }
 
+static int
+ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc)
+{
+	return dax_writeback_mapping_range(mapping,
+			mapping->host->i_sb->s_bdev, wbc);
+}
+
 const struct address_space_operations ext2_aops = {
 	.readpage		= ext2_readpage,
 	.readpages		= ext2_readpages,
@@ -990,6 +986,13 @@ const struct address_space_operations ext2_nobh_aops = {
 	.error_remove_page	= generic_error_remove_page,
 };
 
+static const struct address_space_operations ext2_dax_aops = {
+	.writepages		= ext2_dax_writepages,
+	.direct_IO		= noop_direct_IO,
+	.set_page_dirty		= noop_set_page_dirty,
+	.invalidatepage		= noop_invalidatepage,
+};
+
 /*
  * Probably it should be a library function... search for first non-zero word
  * or memcmp with zero_page, whatever is better for particular architecture.
@@ -1388,6 +1391,18 @@ void ext2_set_inode_flags(struct inode *inode)
 		inode->i_flags |= S_DAX;
 }
 
+void ext2_set_file_ops(struct inode *inode)
+{
+	inode->i_op = &ext2_file_inode_operations;
+	inode->i_fop = &ext2_file_operations;
+	if (IS_DAX(inode))
+		inode->i_mapping->a_ops = &ext2_dax_aops;
+	else if (test_opt(inode->i_sb, NOBH))
+		inode->i_mapping->a_ops = &ext2_nobh_aops;
+	else
+		inode->i_mapping->a_ops = &ext2_aops;
+}
+
 struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 {
 	struct ext2_inode_info *ei;
@@ -1480,14 +1495,7 @@ struct inode *ext2_iget (struct super_block *sb, unsigned long ino)
 		ei->i_data[n] = raw_inode->i_block[n];
 
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &ext2_file_inode_operations;
-		if (test_opt(inode->i_sb, NOBH)) {
-			inode->i_mapping->a_ops = &ext2_nobh_aops;
-			inode->i_fop = &ext2_file_operations;
-		} else {
-			inode->i_mapping->a_ops = &ext2_aops;
-			inode->i_fop = &ext2_file_operations;
-		}
+		ext2_set_file_ops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
 		inode->i_op = &ext2_dir_inode_operations;
 		inode->i_fop = &ext2_dir_operations;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index e078075dc66f..55f7caadb093 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -107,14 +107,7 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	return ext2_add_nondir(dentry, inode);
 }
@@ -125,14 +118,7 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
 	if (IS_ERR(inode))
 		return PTR_ERR(inode);
 
-	inode->i_op = &ext2_file_inode_operations;
-	if (test_opt(inode->i_sb, NOBH)) {
-		inode->i_mapping->a_ops = &ext2_nobh_aops;
-		inode->i_fop = &ext2_file_operations;
-	} else {
-		inode->i_mapping->a_ops = &ext2_aops;
-		inode->i_fop = &ext2_file_operations;
-	}
+	ext2_set_file_ops(inode);
 	mark_inode_dirty(inode);
 	d_tmpfile(dentry, inode);
 	unlock_new_inode(inode);
-- 
cgit v1.2.3


From d2c997c0f14535eff68d8ed9c2f1c5e100625751 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 22 Dec 2017 22:02:48 -0800
Subject: fs, dax: use page->mapping to warn if truncate collides with a busy
 page

Catch cases where extent unmap operations encounter pages that are
pinned / busy. Typically this is pinned pages that are under active dma.
This warning is a canary for potential data corruption as truncated
blocks could be allocated to a new file while the device is still
performing i/o.

Here is an example of a collision that this implementation catches:

 WARNING: CPU: 2 PID: 1286 at fs/dax.c:343 dax_disassociate_entry+0x55/0x80
 [..]
 Call Trace:
  __dax_invalidate_mapping_entry+0x6c/0xf0
  dax_delete_mapping_entry+0xf/0x20
  truncate_exceptional_pvec_entries.part.12+0x1af/0x200
  truncate_inode_pages_range+0x268/0x970
  ? tlb_gather_mmu+0x10/0x20
  ? up_write+0x1c/0x40
  ? unmap_mapping_range+0x73/0x140
  xfs_free_file_space+0x1b6/0x5b0 [xfs]
  ? xfs_file_fallocate+0x7f/0x320 [xfs]
  ? down_write_nested+0x40/0x70
  ? xfs_ilock+0x21d/0x2f0 [xfs]
  xfs_file_fallocate+0x162/0x320 [xfs]
  ? rcu_read_lock_sched_held+0x3f/0x70
  ? rcu_sync_lockdep_assert+0x2a/0x50
  ? __sb_start_write+0xd0/0x1b0
  ? vfs_fallocate+0x20c/0x270
  vfs_fallocate+0x154/0x270
  SyS_fallocate+0x43/0x80
  entry_SYSCALL_64_fastpath+0x1f/0x96

Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Matthew Wilcox <mawilcox@microsoft.com>
Cc: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 fs/dax.c | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)

diff --git a/fs/dax.c b/fs/dax.c
index b646a46e4d12..a77394fe586e 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -298,6 +298,63 @@ static void put_unlocked_mapping_entry(struct address_space *mapping,
 	dax_wake_mapping_entry_waiter(mapping, index, entry, false);
 }
 
+static unsigned long dax_entry_size(void *entry)
+{
+	if (dax_is_zero_entry(entry))
+		return 0;
+	else if (dax_is_empty_entry(entry))
+		return 0;
+	else if (dax_is_pmd_entry(entry))
+		return PMD_SIZE;
+	else
+		return PAGE_SIZE;
+}
+
+static unsigned long dax_radix_end_pfn(void *entry)
+{
+	return dax_radix_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+}
+
+/*
+ * Iterate through all mapped pfns represented by an entry, i.e. skip
+ * 'empty' and 'zero' entries.
+ */
+#define for_each_mapped_pfn(entry, pfn) \
+	for (pfn = dax_radix_pfn(entry); \
+			pfn < dax_radix_end_pfn(entry); pfn++)
+
+static void dax_associate_entry(void *entry, struct address_space *mapping)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(page->mapping);
+		page->mapping = mapping;
+	}
+}
+
+static void dax_disassociate_entry(void *entry, struct address_space *mapping,
+		bool trunc)
+{
+	unsigned long pfn;
+
+	if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
+		return;
+
+	for_each_mapped_pfn(entry, pfn) {
+		struct page *page = pfn_to_page(pfn);
+
+		WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
+		WARN_ON_ONCE(page->mapping && page->mapping != mapping);
+		page->mapping = NULL;
+	}
+}
+
 /*
  * Find radix tree entry at given index. If it points to an exceptional entry,
  * return it with the radix tree entry locked. If the radix tree doesn't
@@ -404,6 +461,7 @@ restart:
 		}
 
 		if (pmd_downgrade) {
+			dax_disassociate_entry(entry, mapping, false);
 			radix_tree_delete(&mapping->page_tree, index);
 			mapping->nrexceptional--;
 			dax_wake_mapping_entry_waiter(mapping, index, entry,
@@ -453,6 +511,7 @@ static int __dax_invalidate_mapping_entry(struct address_space *mapping,
 	    (radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_DIRTY) ||
 	     radix_tree_tag_get(page_tree, index, PAGECACHE_TAG_TOWRITE)))
 		goto out;
+	dax_disassociate_entry(entry, mapping, trunc);
 	radix_tree_delete(page_tree, index);
 	mapping->nrexceptional--;
 	ret = 1;
@@ -547,6 +606,10 @@ static void *dax_insert_mapping_entry(struct address_space *mapping,
 
 	spin_lock_irq(&mapping->tree_lock);
 	new_entry = dax_radix_locked_entry(pfn, flags);
+	if (dax_entry_size(entry) != dax_entry_size(new_entry)) {
+		dax_disassociate_entry(entry, mapping, false);
+		dax_associate_entry(new_entry, mapping);
+	}
 
 	if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry)) {
 		/*
-- 
cgit v1.2.3


From 2080e88aecd78773eacbe7d6b9d59968000ba55a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 29 Mar 2018 17:20:39 -0700
Subject: dax: introduce CONFIG_DAX_DRIVER

In support of allowing device-mapper to compile out idle/dead code when
there are no dax providers in the system, introduce the DAX_DRIVER
symbol. This is selected by all leaf drivers that device-mapper might be
layered on top. This allows device-mapper to conditionally 'select DAX'
only when a provider is present.

Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Reported-by: Bart Van Assche <Bart.VanAssche@wdc.com>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/dax/Kconfig        | 5 ++++-
 drivers/nvdimm/Kconfig     | 2 +-
 drivers/s390/block/Kconfig | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig
index b79aa8f7a497..e0700bf4893a 100644
--- a/drivers/dax/Kconfig
+++ b/drivers/dax/Kconfig
@@ -1,3 +1,7 @@
+config DAX_DRIVER
+	select DAX
+	bool
+
 menuconfig DAX
 	tristate "DAX: direct access to differentiated memory"
 	select SRCU
@@ -16,7 +20,6 @@ config DEV_DAX
 	  baseline memory pool.  Mappings of a /dev/daxX.Y device impose
 	  restrictions that make the mapping behavior deterministic.
 
-
 config DEV_DAX_PMEM
 	tristate "PMEM DAX: direct access to persistent memory"
 	depends on LIBNVDIMM && NVDIMM_DAX && DEV_DAX
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index a65f2e1d9f53..40cbdb16e23e 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -20,7 +20,7 @@ if LIBNVDIMM
 config BLK_DEV_PMEM
 	tristate "PMEM: Persistent memory block device support"
 	default LIBNVDIMM
-	select DAX
+	select DAX_DRIVER
 	select ND_BTT if BTT
 	select ND_PFN if NVDIMM_PFN
 	help
diff --git a/drivers/s390/block/Kconfig b/drivers/s390/block/Kconfig
index 1444333210c7..9ac7574e3cfb 100644
--- a/drivers/s390/block/Kconfig
+++ b/drivers/s390/block/Kconfig
@@ -15,8 +15,8 @@ config BLK_DEV_XPRAM
 
 config DCSSBLK
 	def_tristate m
-	select DAX
 	select FS_DAX_LIMITED
+	select DAX_DRIVER
 	prompt "DCSSBLK support"
 	depends on S390 && BLOCK
 	help
-- 
cgit v1.2.3


From 976431b02c2ef92ae3f8b6a7d699fc554025e118 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 29 Mar 2018 17:22:13 -0700
Subject: dax, dm: allow device-mapper to operate without dax support

Change device-mapper's DAX dependency to require the presence of at
least one DAX_DRIVER. This allows device-mapper to be built without
bringing the DAX core along which is especially wasteful when there are
no DAX drivers, like BLK_DEV_PMEM, configured.

Cc: Alasdair Kergon <agk@redhat.com>
Reported-by: Bart Van Assche <Bart.VanAssche@wdc.com>
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/md/Kconfig         |  2 +-
 drivers/md/dm-linear.c     |  6 +++
 drivers/md/dm-log-writes.c | 95 ++++++++++++++++++++++++----------------------
 drivers/md/dm-stripe.c     |  6 +++
 drivers/md/dm.c            | 10 +++--
 include/linux/dax.h        | 30 ++++++++++++---
 6 files changed, 93 insertions(+), 56 deletions(-)

diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 2c8ac3688815..edff083f7c4e 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -201,7 +201,7 @@ config BLK_DEV_DM_BUILTIN
 config BLK_DEV_DM
 	tristate "Device mapper support"
 	select BLK_DEV_DM_BUILTIN
-	select DAX
+	depends on DAX || DAX=n
 	---help---
 	  Device-mapper is a low level volume manager.  It works by allowing
 	  people to specify mappings for ranges of logical sectors.  Various
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index d5f8eff7c11d..89443e0ededa 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -154,6 +154,7 @@ static int linear_iterate_devices(struct dm_target *ti,
 	return fn(ti, lc->dev, lc->start, ti->len, data);
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -184,6 +185,11 @@ static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define linear_dax_direct_access NULL
+#define linear_dax_copy_from_iter NULL
+#endif
+
 static struct target_type linear_target = {
 	.name   = "linear",
 	.version = {1, 4, 0},
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 3362d866793b..7fcb4216973f 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -610,51 +610,6 @@ static int log_mark(struct log_writes_c *lc, char *data)
 	return 0;
 }
 
-static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
-		   struct iov_iter *i)
-{
-	struct pending_block *block;
-
-	if (!bytes)
-		return 0;
-
-	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
-	if (!block) {
-		DMERR("Error allocating dax pending block");
-		return -ENOMEM;
-	}
-
-	block->data = kzalloc(bytes, GFP_KERNEL);
-	if (!block->data) {
-		DMERR("Error allocating dax data space");
-		kfree(block);
-		return -ENOMEM;
-	}
-
-	/* write data provided via the iterator */
-	if (!copy_from_iter(block->data, bytes, i)) {
-		DMERR("Error copying dax data");
-		kfree(block->data);
-		kfree(block);
-		return -EIO;
-	}
-
-	/* rewind the iterator so that the block driver can use it */
-	iov_iter_revert(i, bytes);
-
-	block->datalen = bytes;
-	block->sector = bio_to_dev_sectors(lc, sector);
-	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
-
-	atomic_inc(&lc->pending_blocks);
-	spin_lock_irq(&lc->blocks_lock);
-	list_add_tail(&block->list, &lc->unflushed_blocks);
-	spin_unlock_irq(&lc->blocks_lock);
-	wake_up_process(lc->log_kthread);
-
-	return 0;
-}
-
 static void log_writes_dtr(struct dm_target *ti)
 {
 	struct log_writes_c *lc = ti->private;
@@ -920,6 +875,52 @@ static void log_writes_io_hints(struct dm_target *ti, struct queue_limits *limit
 	limits->io_min = limits->physical_block_size;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
+static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes,
+		   struct iov_iter *i)
+{
+	struct pending_block *block;
+
+	if (!bytes)
+		return 0;
+
+	block = kzalloc(sizeof(struct pending_block), GFP_KERNEL);
+	if (!block) {
+		DMERR("Error allocating dax pending block");
+		return -ENOMEM;
+	}
+
+	block->data = kzalloc(bytes, GFP_KERNEL);
+	if (!block->data) {
+		DMERR("Error allocating dax data space");
+		kfree(block);
+		return -ENOMEM;
+	}
+
+	/* write data provided via the iterator */
+	if (!copy_from_iter(block->data, bytes, i)) {
+		DMERR("Error copying dax data");
+		kfree(block->data);
+		kfree(block);
+		return -EIO;
+	}
+
+	/* rewind the iterator so that the block driver can use it */
+	iov_iter_revert(i, bytes);
+
+	block->datalen = bytes;
+	block->sector = bio_to_dev_sectors(lc, sector);
+	block->nr_sectors = ALIGN(bytes, lc->sectorsize) >> lc->sectorshift;
+
+	atomic_inc(&lc->pending_blocks);
+	spin_lock_irq(&lc->blocks_lock);
+	list_add_tail(&block->list, &lc->unflushed_blocks);
+	spin_unlock_irq(&lc->blocks_lock);
+	wake_up_process(lc->log_kthread);
+
+	return 0;
+}
+
 static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 					 long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -956,6 +957,10 @@ static size_t log_writes_dax_copy_from_iter(struct dm_target *ti,
 dax_copy:
 	return dax_copy_from_iter(lc->dev->dax_dev, pgoff, addr, bytes, i);
 }
+#else
+#define log_writes_dax_direct_access NULL
+#define log_writes_dax_copy_from_iter NULL
+#endif
 
 static struct target_type log_writes_target = {
 	.name   = "log-writes",
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index b5e892149c54..ac2e8ee9d586 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -311,6 +311,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio)
 	return DM_MAPIO_REMAPPED;
 }
 
+#if IS_ENABLED(CONFIG_DAX_DRIVER)
 static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
 		long nr_pages, void **kaddr, pfn_t *pfn)
 {
@@ -351,6 +352,11 @@ static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff,
 	return dax_copy_from_iter(dax_dev, pgoff, addr, bytes, i);
 }
 
+#else
+#define stripe_dax_direct_access NULL
+#define stripe_dax_copy_from_iter NULL
+#endif
+
 /*
  * Stripe status:
  *
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 68136806d365..ffc93aecc02a 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1800,7 +1800,7 @@ static void cleanup_mapped_device(struct mapped_device *md)
 static struct mapped_device *alloc_dev(int minor)
 {
 	int r, numa_node_id = dm_get_numa_node();
-	struct dax_device *dax_dev;
+	struct dax_device *dax_dev = NULL;
 	struct mapped_device *md;
 	void *old_md;
 
@@ -1866,9 +1866,11 @@ static struct mapped_device *alloc_dev(int minor)
 	md->disk->private_data = md;
 	sprintf(md->disk->disk_name, "dm-%d", minor);
 
-	dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
-	if (!dax_dev)
-		goto bad;
+	if (IS_ENABLED(CONFIG_DAX_DRIVER)) {
+		dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops);
+		if (!dax_dev)
+			goto bad;
+	}
 	md->dax_dev = dax_dev;
 
 	add_disk_no_queue_reg(md->disk);
diff --git a/include/linux/dax.h b/include/linux/dax.h
index ae27a7efe7ab..f9eb22ad341e 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -26,16 +26,39 @@ extern struct attribute_group dax_attribute_group;
 
 #if IS_ENABLED(CONFIG_DAX)
 struct dax_device *dax_get_by_host(const char *host);
+struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops);
 void put_dax(struct dax_device *dax_dev);
+void kill_dax(struct dax_device *dax_dev);
+void dax_write_cache(struct dax_device *dax_dev, bool wc);
+bool dax_write_cache_enabled(struct dax_device *dax_dev);
 #else
 static inline struct dax_device *dax_get_by_host(const char *host)
 {
 	return NULL;
 }
-
+static inline struct dax_device *alloc_dax(void *private, const char *host,
+		const struct dax_operations *ops)
+{
+	/*
+	 * Callers should check IS_ENABLED(CONFIG_DAX) to know if this
+	 * NULL is an error or expected.
+	 */
+	return NULL;
+}
 static inline void put_dax(struct dax_device *dax_dev)
 {
 }
+static inline void kill_dax(struct dax_device *dax_dev)
+{
+}
+static inline void dax_write_cache(struct dax_device *dax_dev, bool wc)
+{
+}
+static inline bool dax_write_cache_enabled(struct dax_device *dax_dev)
+{
+	return false;
+}
 #endif
 
 struct writeback_control;
@@ -89,18 +112,13 @@ static inline int dax_writeback_mapping_range(struct address_space *mapping,
 
 int dax_read_lock(void);
 void dax_read_unlock(int id);
-struct dax_device *alloc_dax(void *private, const char *host,
-		const struct dax_operations *ops);
 bool dax_alive(struct dax_device *dax_dev);
-void kill_dax(struct dax_device *dax_dev);
 void *dax_get_private(struct dax_device *dax_dev);
 long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages,
 		void **kaddr, pfn_t *pfn);
 size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
 		size_t bytes, struct iov_iter *i);
 void dax_flush(struct dax_device *dax_dev, void *addr, size_t size);
-void dax_write_cache(struct dax_device *dax_dev, bool wc);
-bool dax_write_cache_enabled(struct dax_device *dax_dev);
 
 ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter,
 		const struct iomap_ops *ops);
-- 
cgit v1.2.3


From 78727137fdf49edf9f731bde79d7189067b4047a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 2 Apr 2018 16:40:04 -0700
Subject: nfit, address-range-scrub: fix scrub in-progress reporting

There is a small window whereby ARS scan requests can schedule work that
userspace will miss when polling scrub_show. Hold the init_mutex lock
over calls to report the status to close this potential escape. Also,
make sure that requests to cancel the ARS workqueue are treated as an
idle event.

Cc: <stable@vger.kernel.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Fixes: 37b137ff8c83 ("nfit, libnvdimm: allow an ARS scrub...")
Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index ea9f3e727fef..2a1fc3817a81 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -1249,8 +1249,11 @@ static ssize_t scrub_show(struct device *dev,
 	if (nd_desc) {
 		struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc);
 
+		mutex_lock(&acpi_desc->init_mutex);
 		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-				(work_busy(&acpi_desc->work)) ? "+\n" : "\n");
+				work_busy(&acpi_desc->work)
+				&& !acpi_desc->cancel ? "+\n" : "\n");
+		mutex_unlock(&acpi_desc->init_mutex);
 	}
 	device_unlock(dev);
 	return rc;
-- 
cgit v1.2.3


From 243f29fe449bbead69076ad861dbe8f51b42c4d7 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 2 Apr 2018 13:14:25 -0700
Subject: libnvdimm: add an api to cast a 'struct nd_region' to its 'struct
 device'

For debug, it is useful for bus providers to be able to retrieve the
'struct device' associated with an nd_region instance that it
registered. We already have to_nd_region() to perform the reverse cast
operation, in fact its duplicate declaration can be removed from the
private drivers/nvdimm/nd.h header.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/nd.h          | 1 -
 drivers/nvdimm/region_devs.c | 8 ++++++++
 include/linux/libnvdimm.h    | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 8d6375ee0fda..9dad5d737309 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -341,7 +341,6 @@ static inline struct device *nd_dax_create(struct nd_region *nd_region)
 }
 #endif
 
-struct nd_region *to_nd_region(struct device *dev);
 int nd_region_to_nstype(struct nd_region *nd_region);
 int nd_region_register_namespaces(struct nd_region *nd_region, int *err);
 u64 nd_region_interleave_set_cookie(struct nd_region *nd_region,
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index e6d01911e092..0af275c62bdf 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -182,6 +182,14 @@ struct nd_region *to_nd_region(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(to_nd_region);
 
+struct device *nd_region_dev(struct nd_region *nd_region)
+{
+	if (!nd_region)
+		return NULL;
+	return &nd_region->dev;
+}
+EXPORT_SYMBOL_GPL(nd_region_dev);
+
 struct nd_blk_region *to_nd_blk_region(struct device *dev)
 {
 	struct nd_region *nd_region = to_nd_region(dev);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index ff855ed965fb..e0684a678a1b 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -164,6 +164,7 @@ void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus);
 struct nvdimm_bus *to_nvdimm_bus(struct device *dev);
 struct nvdimm *to_nvdimm(struct device *dev);
 struct nd_region *to_nd_region(struct device *dev);
+struct device *nd_region_dev(struct nd_region *nd_region);
 struct nd_blk_region *to_nd_blk_region(struct device *dev);
 struct nvdimm_bus_descriptor *to_nd_desc(struct nvdimm_bus *nvdimm_bus);
 struct device *to_nvdimm_bus_dev(struct nvdimm_bus *nvdimm_bus);
-- 
cgit v1.2.3


From 14c73f997a5e060c6887a80c143021a58975c92a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 2 Apr 2018 15:40:30 -0700
Subject: nfit, address-range-scrub: introduce nfit_spa->ars_state

In preparation for re-working the ARS implementation to better handle
short vs long ARS runs, introduce nfit_spa->ars_state. For now this just
replaces the nfit_spa->ars_required bit-field/flag, but going forward it
will be used to track ARS completion and make short vs long ARS
requests.

Reviewed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 16 +++++++++-------
 drivers/acpi/nfit/nfit.h | 11 +++++++++--
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 2a1fc3817a81..9c56022c216f 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2808,7 +2808,7 @@ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
 	unsigned int tmo = scrub_timeout;
 	int rc;
 
-	if (!nfit_spa->ars_required || !nfit_spa->nd_region)
+	if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region)
 		return;
 
 	rc = ars_start(acpi_desc, nfit_spa);
@@ -3003,7 +3003,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
 		 * register them now to make data available.
 		 */
 		if (!nfit_spa->nd_region) {
-			nfit_spa->ars_required = 1;
+			set_bit(ARS_REQ, &nfit_spa->ars_state);
 			acpi_nfit_register_region(acpi_desc, nfit_spa);
 		}
 	}
@@ -3241,7 +3241,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	return 0;
 }
 
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
 	struct device *dev = acpi_desc->dev;
 	struct nfit_spa *nfit_spa;
@@ -3261,9 +3261,11 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags)
 		if (nfit_spa_type(spa) != NFIT_SPA_PM)
 			continue;
 
-		nfit_spa->ars_required = 1;
+		set_bit(ARS_REQ, &nfit_spa->ars_state);
 	}
-	acpi_desc->ars_start_flags = flags;
+	acpi_desc->ars_start_flags = 0;
+	if (test_bit(ARS_SHORT, &flags))
+		acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA;
 	queue_work(nfit_wq, &acpi_desc->work);
 	dev_dbg(dev, "ars_scan triggered\n");
 	mutex_unlock(&acpi_desc->init_mutex);
@@ -3440,8 +3442,8 @@ static void acpi_nfit_update_notify(struct device *dev, acpi_handle handle)
 static void acpi_nfit_uc_error_notify(struct device *dev, acpi_handle handle)
 {
 	struct acpi_nfit_desc *acpi_desc = dev_get_drvdata(dev);
-	u8 flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
-			0 : ND_ARS_RETURN_PREV_DATA;
+	unsigned long flags = (acpi_desc->scrub_mode == HW_ERROR_SCRUB_ON) ?
+			0 : 1 << ARS_SHORT;
 
 	acpi_nfit_ars_rescan(acpi_desc, flags);
 }
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index ac9c49463731..2b97e5f76bdf 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -117,10 +117,17 @@ enum nfit_dimm_notifiers {
 	NFIT_NOTIFY_DIMM_HEALTH = 0x81,
 };
 
+enum nfit_ars_state {
+	ARS_REQ,
+	ARS_DONE,
+	ARS_SHORT,
+	ARS_FAILED,
+};
+
 struct nfit_spa {
 	struct list_head list;
 	struct nd_region *nd_region;
-	unsigned int ars_required:1;
+	unsigned long ars_state;
 	u32 clear_err_unit;
 	u32 max_ars;
 	struct acpi_nfit_system_address spa[0];
@@ -243,7 +250,7 @@ struct nfit_blk {
 
 extern struct list_head acpi_descs;
 extern struct mutex acpi_desc_lock;
-int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, u8 flags);
+int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags);
 
 #ifdef CONFIG_X86_MCE
 void nfit_mce_register(void);
-- 
cgit v1.2.3


From 4cf260fc409c73f6e40b3e8061a0cb925703d7ee Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 8 Feb 2018 14:24:04 -0700
Subject: libnvdimm, testing: Add emulation for smart injection commands

Add support for the smart injection command in the nvdimm unit test
framework. This allows for directly injecting to smart fields and flags
that are supported in the injection command. If the injected values are
past the threshold, then an acpi notification is also triggered.

Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c      | 38 ++++++++++++++++++++++++++++++++++-
 tools/testing/nvdimm/test/nfit_test.h | 16 +++++++++++++++
 2 files changed, 53 insertions(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index d785235ba04e..e83c24625fe4 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -711,7 +711,9 @@ static void smart_notify(struct device *bus_dev,
 				>= thresh->media_temperature)
 			|| ((thresh->alarm_control & ND_INTEL_SMART_CTEMP_TRIP)
 				&& smart->ctrl_temperature
-				>= thresh->ctrl_temperature)) {
+				>= thresh->ctrl_temperature)
+			|| (smart->health != ND_INTEL_SMART_NON_CRITICAL_HEALTH)
+			|| (smart->shutdown_state != 0)) {
 		device_lock(bus_dev);
 		__acpi_nvdimm_notify(dimm_dev, 0x81);
 		device_unlock(bus_dev);
@@ -737,6 +739,32 @@ static int nfit_test_cmd_smart_set_threshold(
 	return 0;
 }
 
+static int nfit_test_cmd_smart_inject(
+		struct nd_intel_smart_inject *inj,
+		unsigned int buf_len,
+		struct nd_intel_smart_threshold *thresh,
+		struct nd_intel_smart *smart,
+		struct device *bus_dev, struct device *dimm_dev)
+{
+	if (buf_len != sizeof(*inj))
+		return -EINVAL;
+
+	if (inj->mtemp_enable)
+		smart->media_temperature = inj->media_temperature;
+	if (inj->spare_enable)
+		smart->spares = inj->spares;
+	if (inj->fatal_enable)
+		smart->health = ND_INTEL_SMART_FATAL_HEALTH;
+	if (inj->unsafe_shutdown_enable) {
+		smart->shutdown_state = 1;
+		smart->shutdown_count++;
+	}
+	inj->status = 0;
+	smart_notify(bus_dev, dimm_dev, smart, thresh);
+
+	return 0;
+}
+
 static void uc_error_notify(struct work_struct *work)
 {
 	struct nfit_test *t = container_of(work, typeof(*t), work);
@@ -937,6 +965,13 @@ static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc,
 							t->dcr_idx],
 						&t->smart[i - t->dcr_idx],
 						&t->pdev.dev, t->dimm_dev[i]);
+			case ND_INTEL_SMART_INJECT:
+				return nfit_test_cmd_smart_inject(buf,
+						buf_len,
+						&t->smart_threshold[i -
+							t->dcr_idx],
+						&t->smart[i - t->dcr_idx],
+						&t->pdev.dev, t->dimm_dev[i]);
 			default:
 				return -ENOTTY;
 			}
@@ -2066,6 +2101,7 @@ static void nfit_test0_setup(struct nfit_test *t)
 	set_bit(ND_INTEL_SMART, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_INTEL_SMART_SET_THRESHOLD, &acpi_desc->dimm_cmd_force_en);
+	set_bit(ND_INTEL_SMART_INJECT, &acpi_desc->dimm_cmd_force_en);
 	set_bit(ND_CMD_ARS_CAP, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_START, &acpi_desc->bus_cmd_force_en);
 	set_bit(ND_CMD_ARS_STATUS, &acpi_desc->bus_cmd_force_en);
diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h
index 428344519cdf..33752e06ff8d 100644
--- a/tools/testing/nvdimm/test/nfit_test.h
+++ b/tools/testing/nvdimm/test/nfit_test.h
@@ -93,6 +93,7 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_FW_FINISH_UPDATE	15
 #define ND_INTEL_FW_FINISH_QUERY	16
 #define ND_INTEL_SMART_SET_THRESHOLD	17
+#define ND_INTEL_SMART_INJECT		18
 
 #define ND_INTEL_SMART_HEALTH_VALID             (1 << 0)
 #define ND_INTEL_SMART_SPARES_VALID             (1 << 1)
@@ -111,6 +112,10 @@ struct nd_cmd_ars_err_inj_stat {
 #define ND_INTEL_SMART_NON_CRITICAL_HEALTH      (1 << 0)
 #define ND_INTEL_SMART_CRITICAL_HEALTH          (1 << 1)
 #define ND_INTEL_SMART_FATAL_HEALTH             (1 << 2)
+#define ND_INTEL_SMART_INJECT_MTEMP		(1 << 0)
+#define ND_INTEL_SMART_INJECT_SPARE		(1 << 1)
+#define ND_INTEL_SMART_INJECT_FATAL		(1 << 2)
+#define ND_INTEL_SMART_INJECT_SHUTDOWN		(1 << 3)
 
 struct nd_intel_smart {
 	__u32 status;
@@ -158,6 +163,17 @@ struct nd_intel_smart_set_threshold {
 	__u32 status;
 } __packed;
 
+struct nd_intel_smart_inject {
+	__u64 flags;
+	__u8 mtemp_enable;
+	__u16 media_temperature;
+	__u8 spare_enable;
+	__u8 spares;
+	__u8 fatal_enable;
+	__u8 unsafe_shutdown_enable;
+	__u32 status;
+} __packed;
+
 #define INTEL_FW_STORAGE_SIZE		0x100000
 #define INTEL_FW_MAX_SEND_LEN		0xFFEC
 #define INTEL_FW_QUERY_INTERVAL		250000
-- 
cgit v1.2.3


From f6adcca02ca26577692a1f2ee892c134b076b339 Mon Sep 17 00:00:00 2001
From: Vishal Verma <vishal.l.verma@intel.com>
Date: Thu, 8 Feb 2018 14:24:05 -0700
Subject: libnvdimm, testing: update the default smart ctrl_temperature

The default value for smart ctrl_temperature was the same as the
threshold for ctrl_temperature. As a result, any arbitrary smart
injection to the nfit_test dimm could cause this alarm to trigger
and cause an acpi notification. Drop the default value to below the
threshold, so that unrelated injections don't trigger notifications.

Cc: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Vishal Verma <vishal.l.verma@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 tools/testing/nvdimm/test/nfit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index e83c24625fe4..cb166be4918d 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -1259,7 +1259,7 @@ static void smart_init(struct nfit_test *t)
 			| ND_INTEL_SMART_MTEMP_VALID,
 		.health = ND_INTEL_SMART_NON_CRITICAL_HEALTH,
 		.media_temperature = 23 * 16,
-		.ctrl_temperature = 30 * 16,
+		.ctrl_temperature = 25 * 16,
 		.pmic_temperature = 40 * 16,
 		.spares = 75,
 		.alarm_flags = ND_INTEL_SMART_SPARE_TRIP
-- 
cgit v1.2.3


From c31898c8c711f2bbbcaebe802a55827e288d875a Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 6 Apr 2018 11:25:38 -0700
Subject: libnvdimm, dimm: fix dpa reservation vs uninitialized label area

At initialization time the 'dimm' driver caches a copy of the memory
device's label area and reserves address space for each of the
namespaces defined.

However, as can be seen below, the reservation occurs even when the
index blocks are invalid:

 nvdimm nmem0: nvdimm_init_config_data: len: 131072 rc: 0
 nvdimm nmem0: config data size: 131072
 nvdimm nmem0: __nd_label_validate: nsindex0 labelsize 1 invalid
 nvdimm nmem0: __nd_label_validate: nsindex1 labelsize 1 invalid
 nvdimm nmem0: : pmem-6025e505: 0x1000000000 @ 0xf50000000 reserve <-- bad

Gate dpa reservation on the presence of valid index blocks.

Cc: <stable@vger.kernel.org>
Fixes: 4a826c83db4e ("libnvdimm: namespace indices: read and validate")
Reported-by: Krzysztof Rusocki <krzysztof.rusocki@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/dimm.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index f8913b8124b6..233907889f96 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -67,9 +67,11 @@ static int nvdimm_probe(struct device *dev)
 	ndd->ns_next = nd_label_next_nsindex(ndd->ns_current);
 	nd_label_copy(ndd, to_next_namespace_index(ndd),
 			to_current_namespace_index(ndd));
-	rc = nd_label_reserve_dpa(ndd);
-	if (ndd->ns_current >= 0)
-		nvdimm_set_aliasing(dev);
+	if (ndd->ns_current >= 0) {
+		rc = nd_label_reserve_dpa(ndd);
+		if (rc == 0)
+			nvdimm_set_aliasing(dev);
+	}
 	nvdimm_clear_locked(dev);
 	nvdimm_bus_unlock(dev);
 
-- 
cgit v1.2.3


From 4f8672201b7e7ed4f5f6c3cf6dcd080648580582 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Fri, 6 Apr 2018 16:37:21 -0700
Subject: libnvdimm, namespace: use a safe lookup for dimm device name

The following NULL dereference results from incorrectly assuming that
ndd is valid in this print:

  struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);

  /*
   * Give up if we don't find an instance of a uuid at each
   * position (from 0 to nd_region->ndr_mappings - 1), or if we
   * find a dimm with two instances of the same uuid.
   */
  dev_err(&nd_region->dev, "%s missing label for %pUb\n",
                  dev_name(ndd->dev), nd_label->uuid);

 BUG: unable to handle kernel NULL pointer dereference at 0000000000000000
 IP: nd_region_register_namespaces+0xd67/0x13c0 [libnvdimm]
 PGD 0 P4D 0
 Oops: 0000 [#1] SMP PTI
 CPU: 43 PID: 673 Comm: kworker/u609:10 Not tainted 4.16.0-rc4+ #1
 [..]
 RIP: 0010:nd_region_register_namespaces+0xd67/0x13c0 [libnvdimm]
 [..]
 Call Trace:
  ? devres_add+0x2f/0x40
  ? devm_kmalloc+0x52/0x60
  ? nd_region_activate+0x9c/0x320 [libnvdimm]
  nd_region_probe+0x94/0x260 [libnvdimm]
  ? kernfs_add_one+0xe4/0x130
  nvdimm_bus_probe+0x63/0x100 [libnvdimm]

Switch to using the nvdimm device directly.

Fixes: 0e3b0d123c8f ("libnvdimm, namespace: allow multiple pmem...")
Cc: <stable@vger.kernel.org>
Reported-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/namespace_devs.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 89b40ff83025..28afdd668905 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -1921,7 +1921,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 	}
 
 	if (i < nd_region->ndr_mappings) {
-		struct nvdimm_drvdata *ndd = to_ndd(&nd_region->mapping[i]);
+		struct nvdimm *nvdimm = nd_region->mapping[i].nvdimm;
 
 		/*
 		 * Give up if we don't find an instance of a uuid at each
@@ -1929,7 +1929,7 @@ static struct device *create_namespace_pmem(struct nd_region *nd_region,
 		 * find a dimm with two instances of the same uuid.
 		 */
 		dev_err(&nd_region->dev, "%s missing label for %pUb\n",
-				dev_name(ndd->dev), nd_label->uuid);
+				nvdimm_name(nvdimm), nd_label->uuid);
 		rc = -EINVAL;
 		goto err;
 	}
-- 
cgit v1.2.3


From 60ce0f936bdad595a5cfc50b6e347a7de183ab59 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Sat, 7 Apr 2018 07:47:10 -0700
Subject: libnvdimm, region: quiet region probe

The message about constraining number of online cpus to be less than or
equal to ND_MAX_LANES (256) is only useful for block-aperture
configurations and BTT. Make it debug since it is only relevant when
debugging performance.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/region.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 034f0a07d627..b9ca0033cc99 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -27,10 +27,10 @@ static int nd_region_probe(struct device *dev)
 	if (nd_region->num_lanes > num_online_cpus()
 			&& nd_region->num_lanes < num_possible_cpus()
 			&& !test_and_set_bit(0, &once)) {
-		dev_info(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
+		dev_dbg(dev, "online cpus (%d) < concurrent i/o lanes (%d) < possible cpus (%d)\n",
 				num_online_cpus(), nd_region->num_lanes,
 				num_possible_cpus());
-		dev_info(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
+		dev_dbg(dev, "setting nr_cpus=%d may yield better libnvdimm device performance\n",
 				nd_region->num_lanes);
 	}
 
-- 
cgit v1.2.3


From 1ff19f487a7e55bf3cebc96ea2a9a38d66fb7db7 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 6 Apr 2018 15:21:13 +1000
Subject: libnvdimm: Add of_node to region and bus descriptors

We want to be able to cross reference the region and bus devices
with the device tree node that they were spawned from. libNVDIMM
handles creating the actual devices for these internally, so we
need to pass in a pointer to the relevant node in the descriptor.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Acked-by: Balbir Singh <bsingharora@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/bus.c         | 1 +
 drivers/nvdimm/region_devs.c | 1 +
 include/linux/libnvdimm.h    | 3 +++
 3 files changed, 5 insertions(+)

diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
index 0ebc8c4dd0fa..a64023690cad 100644
--- a/drivers/nvdimm/bus.c
+++ b/drivers/nvdimm/bus.c
@@ -358,6 +358,7 @@ struct nvdimm_bus *nvdimm_bus_register(struct device *parent,
 	nvdimm_bus->dev.release = nvdimm_bus_release;
 	nvdimm_bus->dev.groups = nd_desc->attr_groups;
 	nvdimm_bus->dev.bus = &nvdimm_bus_type;
+	nvdimm_bus->dev.of_node = nd_desc->of_node;
 	dev_set_name(&nvdimm_bus->dev, "ndbus%d", nvdimm_bus->id);
 	rc = device_register(&nvdimm_bus->dev);
 	if (rc) {
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 0af275c62bdf..9f9a33e8cb09 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -1013,6 +1013,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
 	dev->parent = &nvdimm_bus->dev;
 	dev->type = dev_type;
 	dev->groups = ndr_desc->attr_groups;
+	dev->of_node = ndr_desc->of_node;
 	nd_region->ndr_size = resource_size(ndr_desc->res);
 	nd_region->ndr_start = ndr_desc->res->start;
 	nd_device_register(dev);
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index e0684a678a1b..097072c5a852 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -76,12 +76,14 @@ typedef int (*ndctl_fn)(struct nvdimm_bus_descriptor *nd_desc,
 		struct nvdimm *nvdimm, unsigned int cmd, void *buf,
 		unsigned int buf_len, int *cmd_rc);
 
+struct device_node;
 struct nvdimm_bus_descriptor {
 	const struct attribute_group **attr_groups;
 	unsigned long bus_dsm_mask;
 	unsigned long cmd_mask;
 	struct module *module;
 	char *provider_name;
+	struct device_node *of_node;
 	ndctl_fn ndctl;
 	int (*flush_probe)(struct nvdimm_bus_descriptor *nd_desc);
 	int (*clear_to_send)(struct nvdimm_bus_descriptor *nd_desc,
@@ -123,6 +125,7 @@ struct nd_region_desc {
 	int num_lanes;
 	int numa_node;
 	unsigned long flags;
+	struct device_node *of_node;
 };
 
 struct device;
-- 
cgit v1.2.3


From 7171976089528cb3d057a6fb288e7f8f89ab7f68 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 6 Apr 2018 15:21:14 +1000
Subject: libnvdimm: Add device-tree based driver

This patch adds peliminary device-tree bindings for persistent memory
regions. The driver registers a libnvdimm bus for each pmem-region
node and each address range under the node is converted to a region
within that bus.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 MAINTAINERS              |   7 +++
 drivers/nvdimm/Kconfig   |  10 ++++
 drivers/nvdimm/Makefile  |   1 +
 drivers/nvdimm/of_pmem.c | 119 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 137 insertions(+)
 create mode 100644 drivers/nvdimm/of_pmem.c

diff --git a/MAINTAINERS b/MAINTAINERS
index 4623caf8d72d..06230c06a071 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8035,6 +8035,13 @@ Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem*
 
+LIBNVDIMM: DEVICETREE BINDINGS
+M:	Oliver O'Halloran <oohall@gmail.com>
+L:	linux-nvdimm@lists.01.org
+Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
+S:	Supported
+F:	drivers/nvdimm/of_pmem.c
+
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:	Dan Williams <dan.j.williams@intel.com>
 L:	linux-nvdimm@lists.01.org
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index a65f2e1d9f53..2d6862bf7436 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -102,4 +102,14 @@ config NVDIMM_DAX
 
 	  Select Y if unsure
 
+config OF_PMEM
+	tristate "Device-tree support for persistent memory regions"
+	depends on OF
+	default LIBNVDIMM
+	help
+	  Allows regions of persistent memory to be described in the
+	  device-tree.
+
+	  Select Y if unsure.
+
 endif
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 70d5f3ad9909..e8847045dac0 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -4,6 +4,7 @@ obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o
 obj-$(CONFIG_ND_BTT) += nd_btt.o
 obj-$(CONFIG_ND_BLK) += nd_blk.o
 obj-$(CONFIG_X86_PMEM_LEGACY) += nd_e820.o
+obj-$(CONFIG_OF_PMEM) += of_pmem.o
 
 nd_pmem-y := pmem.o
 
diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c
new file mode 100644
index 000000000000..85013bad35de
--- /dev/null
+++ b/drivers/nvdimm/of_pmem.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define pr_fmt(fmt) "of_pmem: " fmt
+
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/libnvdimm.h>
+#include <linux/module.h>
+#include <linux/ioport.h>
+#include <linux/slab.h>
+
+static const struct attribute_group *region_attr_groups[] = {
+	&nd_region_attribute_group,
+	&nd_device_attribute_group,
+	NULL,
+};
+
+static const struct attribute_group *bus_attr_groups[] = {
+	&nvdimm_bus_attribute_group,
+	NULL,
+};
+
+struct of_pmem_private {
+	struct nvdimm_bus_descriptor bus_desc;
+	struct nvdimm_bus *bus;
+};
+
+static int of_pmem_region_probe(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv;
+	struct device_node *np;
+	struct nvdimm_bus *bus;
+	bool is_volatile;
+	int i;
+
+	np = dev_of_node(&pdev->dev);
+	if (!np)
+		return -ENXIO;
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->bus_desc.attr_groups = bus_attr_groups;
+	priv->bus_desc.provider_name = "of_pmem";
+	priv->bus_desc.module = THIS_MODULE;
+	priv->bus_desc.of_node = np;
+
+	priv->bus = bus = nvdimm_bus_register(&pdev->dev, &priv->bus_desc);
+	if (!bus) {
+		kfree(priv);
+		return -ENODEV;
+	}
+	platform_set_drvdata(pdev, priv);
+
+	is_volatile = !!of_find_property(np, "volatile", NULL);
+	dev_dbg(&pdev->dev, "Registering %s regions from %pOF\n",
+			is_volatile ? "volatile" : "non-volatile",  np);
+
+	for (i = 0; i < pdev->num_resources; i++) {
+		struct nd_region_desc ndr_desc;
+		struct nd_region *region;
+
+		/*
+		 * NB: libnvdimm copies the data from ndr_desc into it's own
+		 * structures so passing a stack pointer is fine.
+		 */
+		memset(&ndr_desc, 0, sizeof(ndr_desc));
+		ndr_desc.attr_groups = region_attr_groups;
+		ndr_desc.numa_node = of_node_to_nid(np);
+		ndr_desc.res = &pdev->resource[i];
+		ndr_desc.of_node = np;
+		set_bit(ND_REGION_PAGEMAP, &ndr_desc.flags);
+
+		if (is_volatile)
+			region = nvdimm_volatile_region_create(bus, &ndr_desc);
+		else
+			region = nvdimm_pmem_region_create(bus, &ndr_desc);
+
+		if (!region)
+			dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n",
+					ndr_desc.res, np);
+		else
+			dev_dbg(&pdev->dev, "Registered region %pR from %pOF\n",
+					ndr_desc.res, np);
+	}
+
+	return 0;
+}
+
+static int of_pmem_region_remove(struct platform_device *pdev)
+{
+	struct of_pmem_private *priv = platform_get_drvdata(pdev);
+
+	nvdimm_bus_unregister(priv->bus);
+	kfree(priv);
+
+	return 0;
+}
+
+static const struct of_device_id of_pmem_region_match[] = {
+	{ .compatible = "pmem-region" },
+	{ },
+};
+
+static struct platform_driver of_pmem_region_driver = {
+	.probe = of_pmem_region_probe,
+	.remove = of_pmem_region_remove,
+	.driver = {
+		.name = "of_pmem",
+		.owner = THIS_MODULE,
+		.of_match_table = of_pmem_region_match,
+	},
+};
+
+module_platform_driver(of_pmem_region_driver);
+MODULE_DEVICE_TABLE(of, of_pmem_region_match);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("IBM Corporation");
-- 
cgit v1.2.3


From ddc141e5c6be3c5f3f900ba306fe7c7fdb4258cc Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 6 Apr 2018 15:21:15 +1000
Subject: doc/devicetree: Persistent memory region bindings

Add device-tree binding documentation for the nvdimm region driver.

Cc: devicetree@vger.kernel.org
Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 .../devicetree/bindings/pmem/pmem-region.txt       | 65 ++++++++++++++++++++++
 MAINTAINERS                                        |  1 +
 2 files changed, 66 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/pmem/pmem-region.txt

diff --git a/Documentation/devicetree/bindings/pmem/pmem-region.txt b/Documentation/devicetree/bindings/pmem/pmem-region.txt
new file mode 100644
index 000000000000..5cfa4f016a00
--- /dev/null
+++ b/Documentation/devicetree/bindings/pmem/pmem-region.txt
@@ -0,0 +1,65 @@
+Device-tree bindings for persistent memory regions
+-----------------------------------------------------
+
+Persistent memory refers to a class of memory devices that are:
+
+	a) Usable as main system memory (i.e. cacheable), and
+	b) Retain their contents across power failure.
+
+Given b) it is best to think of persistent memory as a kind of memory mapped
+storage device. To ensure data integrity the operating system needs to manage
+persistent regions separately to the normal memory pool. To aid with that this
+binding provides a standardised interface for discovering where persistent
+memory regions exist inside the physical address space.
+
+Bindings for the region nodes:
+-----------------------------
+
+Required properties:
+	- compatible = "pmem-region"
+
+	- reg = <base, size>;
+		The reg property should specificy an address range that is
+		translatable to a system physical address range. This address
+		range should be mappable as normal system memory would be
+		(i.e cacheable).
+
+		If the reg property contains multiple address ranges
+		each address range will be treated as though it was specified
+		in a separate device node. Having multiple address ranges in a
+		node implies no special relationship between the two ranges.
+
+Optional properties:
+	- Any relevant NUMA assocativity properties for the target platform.
+
+	- volatile; This property indicates that this region is actually
+	  backed by non-persistent memory. This lets the OS know that it
+	  may skip the cache flushes required to ensure data is made
+	  persistent after a write.
+
+	  If this property is absent then the OS must assume that the region
+	  is backed by non-volatile memory.
+
+Examples:
+--------------------
+
+	/*
+	 * This node specifies one 4KB region spanning from
+	 * 0x5000 to 0x5fff that is backed by non-volatile memory.
+	 */
+	pmem@5000 {
+		compatible = "pmem-region";
+		reg = <0x00005000 0x00001000>;
+	};
+
+	/*
+	 * This node specifies two 4KB regions that are backed by
+	 * volatile (normal) memory.
+	 */
+	pmem@6000 {
+		compatible = "pmem-region";
+		reg = < 0x00006000 0x00001000
+			0x00008000 0x00001000 >;
+		volatile;
+	};
+
diff --git a/MAINTAINERS b/MAINTAINERS
index 06230c06a071..662a71da42c7 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8041,6 +8041,7 @@ L:	linux-nvdimm@lists.01.org
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/of_pmem.c
+F:	Documentation/devicetree/bindings/pmem/pmem-region.txt
 
 LIBNVDIMM: NON-VOLATILE MEMORY DEVICE SUBSYSTEM
 M:	Dan Williams <dan.j.williams@intel.com>
-- 
cgit v1.2.3


From 3013e17381b8ba9adc5b0fb424128ea692205300 Mon Sep 17 00:00:00 2001
From: Oliver O'Halloran <oohall@gmail.com>
Date: Fri, 6 Apr 2018 15:21:16 +1000
Subject: powerpc/powernv: Create platform devs for nvdimm buses

Scan the devicetree for an nvdimm-bus compatible and create
a platform device for them.

Signed-off-by: Oliver O'Halloran <oohall@gmail.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 arch/powerpc/platforms/powernv/opal.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index c15182765ff5..c37485a3c5c9 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -821,6 +821,9 @@ static int __init opal_init(void)
 	/* Create i2c platform devices */
 	opal_pdev_init("ibm,opal-i2c");
 
+	/* Handle non-volatile memory devices */
+	opal_pdev_init("pmem-region");
+
 	/* Setup a heatbeat thread if requested by OPAL */
 	opal_init_heartbeat();
 
-- 
cgit v1.2.3


From 459d0ddb079c869c986e1bb871c91564a4b8ccfe Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 5 Apr 2018 01:25:02 -0700
Subject: nfit, address-range-scrub: determine one platform max_ars value

acpi_nfit_query_poison() is awkward in that it requires an nfit_spa
argument in order to determine what max_ars value to use. Instead probe
for the minimum max_ars across all scrub-capable ranges in the system
and drop the nfit_spa argument.

This enables a larger rework / simplification of the ARS state machine
whereby the status can be retrieved once and then iterated over all
address ranges to reap completions.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 78 +++++++++++++++++++++++++-----------------------
 drivers/acpi/nfit/nfit.h |  2 +-
 2 files changed, 41 insertions(+), 39 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 9c56022c216f..866853abebea 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2494,16 +2494,16 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
 	int rc, cmd_rc;
 
 	rc = nd_desc->ndctl(nd_desc, NULL, ND_CMD_ARS_STATUS, ars_status,
-			acpi_desc->ars_status_size, &cmd_rc);
+			acpi_desc->max_ars, &cmd_rc);
 	if (rc < 0)
 		return rc;
 	return cmd_rc;
 }
 
-static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc,
-		struct nd_cmd_ars_status *ars_status)
+static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
 	int rc;
 	u32 i;
 
@@ -2739,60 +2739,35 @@ static int acpi_nfit_register_region(struct acpi_nfit_desc *acpi_desc,
 	return rc;
 }
 
-static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc,
-		u32 max_ars)
+static int ars_status_alloc(struct acpi_nfit_desc *acpi_desc)
 {
 	struct device *dev = acpi_desc->dev;
 	struct nd_cmd_ars_status *ars_status;
 
-	if (acpi_desc->ars_status && acpi_desc->ars_status_size >= max_ars) {
-		memset(acpi_desc->ars_status, 0, acpi_desc->ars_status_size);
+	if (acpi_desc->ars_status) {
+		memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 		return 0;
 	}
 
-	if (acpi_desc->ars_status)
-		devm_kfree(dev, acpi_desc->ars_status);
-	acpi_desc->ars_status = NULL;
-	ars_status = devm_kzalloc(dev, max_ars, GFP_KERNEL);
+	ars_status = devm_kzalloc(dev, acpi_desc->max_ars, GFP_KERNEL);
 	if (!ars_status)
 		return -ENOMEM;
 	acpi_desc->ars_status = ars_status;
-	acpi_desc->ars_status_size = max_ars;
 	return 0;
 }
 
-static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
 	int rc;
 
-	if (!nfit_spa->max_ars) {
-		struct nd_cmd_ars_cap ars_cap;
-
-		memset(&ars_cap, 0, sizeof(ars_cap));
-		rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
-		if (rc < 0)
-			return rc;
-		nfit_spa->max_ars = ars_cap.max_ars_out;
-		nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
-		/* check that the supported scrub types match the spa type */
-		if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE &&
-				((ars_cap.status >> 16) & ND_ARS_VOLATILE) == 0)
-			return -ENOTTY;
-		else if (nfit_spa_type(spa) == NFIT_SPA_PM &&
-				((ars_cap.status >> 16) & ND_ARS_PERSISTENT) == 0)
-			return -ENOTTY;
-	}
-
-	if (ars_status_alloc(acpi_desc, nfit_spa->max_ars))
+	if (ars_status_alloc(acpi_desc))
 		return -ENOMEM;
 
 	rc = ars_get_status(acpi_desc);
 	if (rc < 0 && rc != -ENOSPC)
 		return rc;
 
-	if (ars_status_process_records(acpi_desc, acpi_desc->ars_status))
+	if (ars_status_process_records(acpi_desc))
 		return -ENOMEM;
 
 	return 0;
@@ -2824,7 +2799,7 @@ static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
 
 		if (acpi_desc->cancel)
 			break;
-		rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+		rc = acpi_nfit_query_poison(acpi_desc);
 		if (rc == -ENOTTY)
 			break;
 		if (rc == -EBUSY && !tmo) {
@@ -2925,7 +2900,7 @@ static void acpi_nfit_scrub(struct work_struct *work)
 			 */
 			rc = 0;
 		} else
-			rc = acpi_nfit_query_poison(acpi_desc, nfit_spa);
+			rc = acpi_nfit_query_poison(acpi_desc);
 
 		if (rc == -ENOTTY) {
 			/* no ars capability, just register spa and move on */
@@ -3018,6 +2993,31 @@ static void acpi_nfit_scrub(struct work_struct *work)
 	mutex_unlock(&acpi_desc->init_mutex);
 }
 
+static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	int type = nfit_spa_type(nfit_spa->spa);
+	struct nd_cmd_ars_cap ars_cap;
+	int rc;
+
+	memset(&ars_cap, 0, sizeof(ars_cap));
+	rc = ars_get_cap(acpi_desc, &ars_cap, nfit_spa);
+	if (rc < 0)
+		return;
+	/* check that the supported scrub types match the spa type */
+	if (type == NFIT_SPA_VOLATILE && ((ars_cap.status >> 16)
+				& ND_ARS_VOLATILE) == 0)
+		return;
+	if (type == NFIT_SPA_PM && ((ars_cap.status >> 16)
+				& ND_ARS_PERSISTENT) == 0)
+		return;
+
+	nfit_spa->max_ars = ars_cap.max_ars_out;
+	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
+	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+}
+
+
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
@@ -3026,8 +3026,10 @@ static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 		int rc, type = nfit_spa_type(nfit_spa->spa);
 
 		/* PMEM and VMEM will be registered by the ARS workqueue */
-		if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE)
+		if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) {
+			acpi_nfit_init_ars(acpi_desc, nfit_spa);
 			continue;
+		}
 		/* BLK apertures belong to BLK region registration below */
 		if (type == NFIT_SPA_BDW)
 			continue;
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 2b97e5f76bdf..45e7949986a8 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -197,10 +197,10 @@ struct acpi_nfit_desc {
 	struct device *dev;
 	u8 ars_start_flags;
 	struct nd_cmd_ars_status *ars_status;
-	size_t ars_status_size;
 	struct work_struct work;
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
+	unsigned int max_ars;
 	unsigned int scrub_count;
 	unsigned int scrub_mode;
 	unsigned int cancel:1;
-- 
cgit v1.2.3


From bc6ba8085842164f2a8dc2e78e23a7167872abbe Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Thu, 5 Apr 2018 16:18:55 -0700
Subject: nfit, address-range-scrub: rework and simplify ARS state machine

ARS is an operation that can take 10s to 100s of seconds to find media
errors that should rarely be present. If the platform crashes due to
media errors in persistent memory, the expectation is that the BIOS will
report those known errors in a 'short' ARS request.

A 'short' ARS request asks platform firmware to return an ARS payload
with all known errors, but without issuing a 'long' scrub. At driver
init a short request is issued to all PMEM ranges before registering
regions. Then, in the background, a long ARS is scheduled for each
region.

The ARS implementation is simplified to centralize ARS completion work
in the ars_complete() helper. The timeout is removed since there is no
facility to cancel ARS, and this otherwise arranges for system init to
never be blocked waiting for a 'long' ARS. The ars_state flags are used
to coordinate ARS requests from driver init, ARS requests from
userspace, and ARS requests in response to media error notifications.

Given that there is no notification of ARS completion the implementation
still needs to poll. It backs off exponentially to a maximum poll period
of 30 minutes.

Suggested-by: Toshi Kani <toshi.kani@hpe.com>
Co-developed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 482 +++++++++++++++++++++--------------------------
 drivers/acpi/nfit/nfit.h |   4 +-
 2 files changed, 218 insertions(+), 268 deletions(-)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 866853abebea..2532294bbd68 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -35,16 +35,6 @@ static bool force_enable_dimms;
 module_param(force_enable_dimms, bool, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(force_enable_dimms, "Ignore _STA (ACPI DIMM device) status");
 
-static unsigned int scrub_timeout = NFIT_ARS_TIMEOUT;
-module_param(scrub_timeout, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_timeout, "Initial scrub timeout in seconds");
-
-/* after three payloads of overflow, it's dead jim */
-static unsigned int scrub_overflow_abort = 3;
-module_param(scrub_overflow_abort, uint, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(scrub_overflow_abort,
-		"Number of times we overflow ARS results before abort");
-
 static bool disable_vendor_specific;
 module_param(disable_vendor_specific, bool, S_IRUGO);
 MODULE_PARM_DESC(disable_vendor_specific,
@@ -1251,7 +1241,7 @@ static ssize_t scrub_show(struct device *dev,
 
 		mutex_lock(&acpi_desc->init_mutex);
 		rc = sprintf(buf, "%d%s", acpi_desc->scrub_count,
-				work_busy(&acpi_desc->work)
+				work_busy(&acpi_desc->dwork.work)
 				&& !acpi_desc->cancel ? "+\n" : "\n");
 		mutex_unlock(&acpi_desc->init_mutex);
 	}
@@ -2452,7 +2442,8 @@ static int ars_start(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa
 	memset(&ars_start, 0, sizeof(ars_start));
 	ars_start.address = spa->address;
 	ars_start.length = spa->length;
-	ars_start.flags = acpi_desc->ars_start_flags;
+	if (test_bit(ARS_SHORT, &nfit_spa->ars_state))
+		ars_start.flags = ND_ARS_RETURN_PREV_DATA;
 	if (nfit_spa_type(spa) == NFIT_SPA_PM)
 		ars_start.type = ND_ARS_PERSISTENT;
 	else if (nfit_spa_type(spa) == NFIT_SPA_VOLATILE)
@@ -2500,6 +2491,52 @@ static int ars_get_status(struct acpi_nfit_desc *acpi_desc)
 	return cmd_rc;
 }
 
+static void ars_complete(struct acpi_nfit_desc *acpi_desc,
+		struct nfit_spa *nfit_spa)
+{
+	struct nd_cmd_ars_status *ars_status = acpi_desc->ars_status;
+	struct acpi_nfit_system_address *spa = nfit_spa->spa;
+	struct nd_region *nd_region = nfit_spa->nd_region;
+	struct device *dev;
+
+	if ((ars_status->address >= spa->address && ars_status->address
+				< spa->address + spa->length)
+			|| (ars_status->address < spa->address)) {
+		/*
+		 * Assume that if a scrub starts at an offset from the
+		 * start of nfit_spa that we are in the continuation
+		 * case.
+		 *
+		 * Otherwise, if the scrub covers the spa range, mark
+		 * any pending request complete.
+		 */
+		if (ars_status->address + ars_status->length
+				>= spa->address + spa->length)
+				/* complete */;
+		else
+			return;
+	} else
+		return;
+
+	if (test_bit(ARS_DONE, &nfit_spa->ars_state))
+		return;
+
+	if (!test_and_clear_bit(ARS_REQ, &nfit_spa->ars_state))
+		return;
+
+	if (nd_region) {
+		dev = nd_region_dev(nd_region);
+		nvdimm_region_notify(nd_region, NVDIMM_REVALIDATE_POISON);
+	} else
+		dev = acpi_desc->dev;
+
+	dev_dbg(dev, "ARS: range %d %s complete\n", spa->range_index,
+			test_bit(ARS_SHORT, &nfit_spa->ars_state)
+			? "short" : "long");
+	clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+	set_bit(ARS_DONE, &nfit_spa->ars_state);
+}
+
 static int ars_status_process_records(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nvdimm_bus *nvdimm_bus = acpi_desc->nvdimm_bus;
@@ -2764,6 +2801,7 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 		return -ENOMEM;
 
 	rc = ars_get_status(acpi_desc);
+
 	if (rc < 0 && rc != -ENOSPC)
 		return rc;
 
@@ -2773,223 +2811,125 @@ static int acpi_nfit_query_poison(struct acpi_nfit_desc *acpi_desc)
 	return 0;
 }
 
-static void acpi_nfit_async_scrub(struct acpi_nfit_desc *acpi_desc,
-		struct nfit_spa *nfit_spa)
+static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_spa,
+		int *query_rc)
 {
-	struct acpi_nfit_system_address *spa = nfit_spa->spa;
-	unsigned int overflow_retry = scrub_overflow_abort;
-	u64 init_ars_start = 0, init_ars_len = 0;
-	struct device *dev = acpi_desc->dev;
-	unsigned int tmo = scrub_timeout;
-	int rc;
-
-	if (!test_bit(ARS_REQ, &nfit_spa->ars_state) || !nfit_spa->nd_region)
-		return;
-
-	rc = ars_start(acpi_desc, nfit_spa);
-	/*
-	 * If we timed out the initial scan we'll still be busy here,
-	 * and will wait another timeout before giving up permanently.
-	 */
-	if (rc < 0 && rc != -EBUSY)
-		return;
+	int rc = *query_rc;
 
-	do {
-		u64 ars_start, ars_len;
-
-		if (acpi_desc->cancel)
-			break;
-		rc = acpi_nfit_query_poison(acpi_desc);
-		if (rc == -ENOTTY)
-			break;
-		if (rc == -EBUSY && !tmo) {
-			dev_warn(dev, "range %d ars timeout, aborting\n",
-					spa->range_index);
-			break;
-		}
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
+	set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
+	switch (rc) {
+	case 0:
+	case -EAGAIN:
+		rc = ars_start(acpi_desc, nfit_spa);
 		if (rc == -EBUSY) {
-			/*
-			 * Note, entries may be appended to the list
-			 * while the lock is dropped, but the workqueue
-			 * being active prevents entries being deleted /
-			 * freed.
-			 */
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			mutex_lock(&acpi_desc->init_mutex);
-			continue;
-		}
-
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			if (!init_ars_len) {
-				init_ars_len = acpi_desc->ars_status->length;
-				init_ars_start = acpi_desc->ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-		}
-
-		if (rc < 0) {
-			dev_warn(dev, "range %d ars continuation failed\n",
-					spa->range_index);
+			*query_rc = rc;
 			break;
-		}
-
-		if (init_ars_len) {
-			ars_start = init_ars_start;
-			ars_len = init_ars_len;
+		} else if (rc == 0) {
+			rc = acpi_nfit_query_poison(acpi_desc);
 		} else {
-			ars_start = acpi_desc->ars_status->address;
-			ars_len = acpi_desc->ars_status->length;
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
+			break;
 		}
-		dev_dbg(dev, "spa range: %d ars from %#llx + %#llx complete\n",
-				spa->range_index, ars_start, ars_len);
-		/* notify the region about new poison entries */
-		nvdimm_region_notify(nfit_spa->nd_region,
-				NVDIMM_REVALIDATE_POISON);
+		if (rc == -EAGAIN)
+			clear_bit(ARS_SHORT, &nfit_spa->ars_state);
+		else if (rc == 0)
+			ars_complete(acpi_desc, nfit_spa);
+		break;
+	case -EBUSY:
+	case -ENOSPC:
 		break;
-	} while (1);
+	default:
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		break;
+	}
+
+	if (test_and_clear_bit(ARS_DONE, &nfit_spa->ars_state))
+		set_bit(ARS_REQ, &nfit_spa->ars_state);
+
+	return acpi_nfit_register_region(acpi_desc, nfit_spa);
 }
 
-static void acpi_nfit_scrub(struct work_struct *work)
+static void ars_complete_all(struct acpi_nfit_desc *acpi_desc)
 {
-	struct device *dev;
-	u64 init_scrub_length = 0;
 	struct nfit_spa *nfit_spa;
-	u64 init_scrub_address = 0;
-	bool init_ars_done = false;
-	struct acpi_nfit_desc *acpi_desc;
-	unsigned int tmo = scrub_timeout;
-	unsigned int overflow_retry = scrub_overflow_abort;
-
-	acpi_desc = container_of(work, typeof(*acpi_desc), work);
-	dev = acpi_desc->dev;
-
-	/*
-	 * We scrub in 2 phases.  The first phase waits for any platform
-	 * firmware initiated scrubs to complete and then we go search for the
-	 * affected spa regions to mark them scanned.  In the second phase we
-	 * initiate a directed scrub for every range that was not scrubbed in
-	 * phase 1. If we're called for a 'rescan', we harmlessly pass through
-	 * the first phase, but really only care about running phase 2, where
-	 * regions can be notified of new poison.
-	 */
 
-	/* process platform firmware initiated scrubs */
- retry:
-	mutex_lock(&acpi_desc->init_mutex);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct nd_cmd_ars_status *ars_status;
-		struct acpi_nfit_system_address *spa;
-		u64 ars_start, ars_len;
-		int rc;
-
-		if (acpi_desc->cancel)
-			break;
-
-		if (nfit_spa->nd_region)
-			continue;
-
-		if (init_ars_done) {
-			/*
-			 * No need to re-query, we're now just
-			 * reconciling all the ranges covered by the
-			 * initial scrub
-			 */
-			rc = 0;
-		} else
-			rc = acpi_nfit_query_poison(acpi_desc);
-
-		if (rc == -ENOTTY) {
-			/* no ars capability, just register spa and move on */
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
-		}
-
-		if (rc == -EBUSY && !tmo) {
-			/* fallthrough to directed scrub in phase 2 */
-			dev_warn(dev, "timeout awaiting ars results, continuing...\n");
-			break;
-		} else if (rc == -EBUSY) {
-			mutex_unlock(&acpi_desc->init_mutex);
-			ssleep(1);
-			tmo--;
-			goto retry;
-		}
+		ars_complete(acpi_desc, nfit_spa);
+	}
+}
 
-		/* we got some results, but there are more pending... */
-		if (rc == -ENOSPC && overflow_retry--) {
-			ars_status = acpi_desc->ars_status;
-			/*
-			 * Record the original scrub range, so that we
-			 * can recall all the ranges impacted by the
-			 * initial scrub.
-			 */
-			if (!init_scrub_length) {
-				init_scrub_length = ars_status->length;
-				init_scrub_address = ars_status->address;
-			}
-			rc = ars_continue(acpi_desc);
-			if (rc == 0) {
-				mutex_unlock(&acpi_desc->init_mutex);
-				goto retry;
-			}
-		}
+static unsigned int __acpi_nfit_scrub(struct acpi_nfit_desc *acpi_desc,
+		int query_rc)
+{
+	unsigned int tmo = acpi_desc->scrub_tmo;
+	struct device *dev = acpi_desc->dev;
+	struct nfit_spa *nfit_spa;
 
-		if (rc < 0) {
-			/*
-			 * Initial scrub failed, we'll give it one more
-			 * try below...
-			 */
-			break;
-		}
+	if (acpi_desc->cancel)
+		return 0;
 
-		/* We got some final results, record completed ranges */
-		ars_status = acpi_desc->ars_status;
-		if (init_scrub_length) {
-			ars_start = init_scrub_address;
-			ars_len = ars_start + init_scrub_length;
-		} else {
-			ars_start = ars_status->address;
-			ars_len = ars_status->length;
-		}
-		spa = nfit_spa->spa;
+	if (query_rc == -EBUSY) {
+		dev_dbg(dev, "ARS: ARS busy\n");
+		return min(30U * 60U, tmo * 2);
+	}
+	if (query_rc == -ENOSPC) {
+		dev_dbg(dev, "ARS: ARS continue\n");
+		ars_continue(acpi_desc);
+		return 1;
+	}
+	if (query_rc && query_rc != -EAGAIN) {
+		unsigned long long addr, end;
 
-		if (!init_ars_done) {
-			init_ars_done = true;
-			dev_dbg(dev, "init scrub %#llx + %#llx complete\n",
-					ars_start, ars_len);
-		}
-		if (ars_start <= spa->address && ars_start + ars_len
-				>= spa->address + spa->length)
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		addr = acpi_desc->ars_status->address;
+		end = addr + acpi_desc->ars_status->length;
+		dev_dbg(dev, "ARS: %llx-%llx failed (%d)\n", addr, end,
+				query_rc);
 	}
 
-	/*
-	 * For all the ranges not covered by an initial scrub we still
-	 * want to see if there are errors, but it's ok to discover them
-	 * asynchronously.
-	 */
+	ars_complete_all(acpi_desc);
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		/*
-		 * Flag all the ranges that still need scrubbing, but
-		 * register them now to make data available.
-		 */
-		if (!nfit_spa->nd_region) {
-			set_bit(ARS_REQ, &nfit_spa->ars_state);
-			acpi_nfit_register_region(acpi_desc, nfit_spa);
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
+			continue;
+		if (test_bit(ARS_REQ, &nfit_spa->ars_state)) {
+			int rc = ars_start(acpi_desc, nfit_spa);
+
+			clear_bit(ARS_DONE, &nfit_spa->ars_state);
+			dev = nd_region_dev(nfit_spa->nd_region);
+			dev_dbg(dev, "ARS: range %d ARS start (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			if (rc == 0 || rc == -EBUSY)
+				return 1;
+			dev_err(dev, "ARS: range %d ARS failed (%d)\n",
+					nfit_spa->spa->range_index, rc);
+			set_bit(ARS_FAILED, &nfit_spa->ars_state);
 		}
 	}
-	acpi_desc->init_complete = 1;
+	return 0;
+}
 
-	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
-		acpi_nfit_async_scrub(acpi_desc, nfit_spa);
-	acpi_desc->scrub_count++;
-	acpi_desc->ars_start_flags = 0;
-	if (acpi_desc->scrub_count_state)
-		sysfs_notify_dirent(acpi_desc->scrub_count_state);
+static void acpi_nfit_scrub(struct work_struct *work)
+{
+	struct acpi_nfit_desc *acpi_desc;
+	unsigned int tmo;
+	int query_rc;
+
+	acpi_desc = container_of(work, typeof(*acpi_desc), dwork.work);
+	mutex_lock(&acpi_desc->init_mutex);
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	tmo = __acpi_nfit_scrub(acpi_desc, query_rc);
+	if (tmo) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, tmo * HZ);
+		acpi_desc->scrub_tmo = tmo;
+	} else {
+		acpi_desc->scrub_count++;
+		if (acpi_desc->scrub_count_state)
+			sysfs_notify_dirent(acpi_desc->scrub_count_state);
+	}
+	memset(acpi_desc->ars_status, 0, acpi_desc->max_ars);
 	mutex_unlock(&acpi_desc->init_mutex);
 }
 
@@ -3015,33 +2955,61 @@ static void acpi_nfit_init_ars(struct acpi_nfit_desc *acpi_desc,
 	nfit_spa->max_ars = ars_cap.max_ars_out;
 	nfit_spa->clear_err_unit = ars_cap.clear_err_unit;
 	acpi_desc->max_ars = max(nfit_spa->max_ars, acpi_desc->max_ars);
+	clear_bit(ARS_FAILED, &nfit_spa->ars_state);
+	set_bit(ARS_REQ, &nfit_spa->ars_state);
 }
 
-
 static int acpi_nfit_register_regions(struct acpi_nfit_desc *acpi_desc)
 {
 	struct nfit_spa *nfit_spa;
+	int rc, query_rc;
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		int rc, type = nfit_spa_type(nfit_spa->spa);
-
-		/* PMEM and VMEM will be registered by the ARS workqueue */
-		if (type == NFIT_SPA_PM || type == NFIT_SPA_VOLATILE) {
+		set_bit(ARS_FAILED, &nfit_spa->ars_state);
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
 			acpi_nfit_init_ars(acpi_desc, nfit_spa);
-			continue;
+			break;
 		}
-		/* BLK apertures belong to BLK region registration below */
-		if (type == NFIT_SPA_BDW)
-			continue;
-		/* BLK regions don't need to wait for ARS results */
-		rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
-		if (rc)
-			return rc;
 	}
 
-	acpi_desc->ars_start_flags = 0;
-	if (!acpi_desc->cancel)
-		queue_work(nfit_wq, &acpi_desc->work);
+	/*
+	 * Reap any results that might be pending before starting new
+	 * short requests.
+	 */
+	query_rc = acpi_nfit_query_poison(acpi_desc);
+	if (query_rc == 0)
+		ars_complete_all(acpi_desc);
+
+	list_for_each_entry(nfit_spa, &acpi_desc->spas, list)
+		switch (nfit_spa_type(nfit_spa->spa)) {
+		case NFIT_SPA_VOLATILE:
+		case NFIT_SPA_PM:
+			/* register regions and kick off initial ARS run */
+			rc = ars_register(acpi_desc, nfit_spa, &query_rc);
+			if (rc)
+				return rc;
+			break;
+		case NFIT_SPA_BDW:
+			/* nothing to register */
+			break;
+		case NFIT_SPA_DCR:
+		case NFIT_SPA_VDISK:
+		case NFIT_SPA_VCD:
+		case NFIT_SPA_PDISK:
+		case NFIT_SPA_PCD:
+			/* register known regions that don't support ARS */
+			rc = acpi_nfit_register_region(acpi_desc, nfit_spa);
+			if (rc)
+				return rc;
+			break;
+		default:
+			/* don't register unknown regions */
+			break;
+		}
+
+	queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
 	return 0;
 }
 
@@ -3176,49 +3144,20 @@ int acpi_nfit_init(struct acpi_nfit_desc *acpi_desc, void *data, acpi_size sz)
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_init);
 
-struct acpi_nfit_flush_work {
-	struct work_struct work;
-	struct completion cmp;
-};
-
-static void flush_probe(struct work_struct *work)
-{
-	struct acpi_nfit_flush_work *flush;
-
-	flush = container_of(work, typeof(*flush), work);
-	complete(&flush->cmp);
-}
-
 static int acpi_nfit_flush_probe(struct nvdimm_bus_descriptor *nd_desc)
 {
 	struct acpi_nfit_desc *acpi_desc = to_acpi_nfit_desc(nd_desc);
 	struct device *dev = acpi_desc->dev;
-	struct acpi_nfit_flush_work flush;
-	int rc;
 
-	/* bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
+	/* Bounce the device lock to flush acpi_nfit_add / acpi_nfit_notify */
 	device_lock(dev);
 	device_unlock(dev);
 
-	/* bounce the init_mutex to make init_complete valid */
+	/* Bounce the init_mutex to complete initial registration */
 	mutex_lock(&acpi_desc->init_mutex);
-	if (acpi_desc->cancel || acpi_desc->init_complete) {
-		mutex_unlock(&acpi_desc->init_mutex);
-		return 0;
-	}
-
-	/*
-	 * Scrub work could take 10s of seconds, userspace may give up so we
-	 * need to be interruptible while waiting.
-	 */
-	INIT_WORK_ONSTACK(&flush.work, flush_probe);
-	init_completion(&flush.cmp);
-	queue_work(nfit_wq, &flush.work);
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	rc = wait_for_completion_interruptible(&flush.cmp);
-	cancel_work_sync(&flush.work);
-	return rc;
+	return 0;
 }
 
 static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
@@ -3237,7 +3176,7 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 	 * just needs guarantees that any ars it initiates are not
 	 * interrupted by any intervening start reqeusts from userspace.
 	 */
-	if (work_busy(&acpi_desc->work))
+	if (work_busy(&acpi_desc->dwork.work))
 		return -EBUSY;
 
 	return 0;
@@ -3246,11 +3185,9 @@ static int acpi_nfit_clear_to_send(struct nvdimm_bus_descriptor *nd_desc,
 int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 {
 	struct device *dev = acpi_desc->dev;
+	int scheduled = 0, busy = 0;
 	struct nfit_spa *nfit_spa;
 
-	if (work_busy(&acpi_desc->work))
-		return -EBUSY;
-
 	mutex_lock(&acpi_desc->init_mutex);
 	if (acpi_desc->cancel) {
 		mutex_unlock(&acpi_desc->init_mutex);
@@ -3258,21 +3195,32 @@ int acpi_nfit_ars_rescan(struct acpi_nfit_desc *acpi_desc, unsigned long flags)
 	}
 
 	list_for_each_entry(nfit_spa, &acpi_desc->spas, list) {
-		struct acpi_nfit_system_address *spa = nfit_spa->spa;
+		int type = nfit_spa_type(nfit_spa->spa);
 
-		if (nfit_spa_type(spa) != NFIT_SPA_PM)
+		if (type != NFIT_SPA_PM && type != NFIT_SPA_VOLATILE)
+			continue;
+		if (test_bit(ARS_FAILED, &nfit_spa->ars_state))
 			continue;
 
-		set_bit(ARS_REQ, &nfit_spa->ars_state);
+		if (test_and_set_bit(ARS_REQ, &nfit_spa->ars_state))
+			busy++;
+		else {
+			if (test_bit(ARS_SHORT, &flags))
+				set_bit(ARS_SHORT, &nfit_spa->ars_state);
+			scheduled++;
+		}
+	}
+	if (scheduled) {
+		queue_delayed_work(nfit_wq, &acpi_desc->dwork, 0);
+		dev_dbg(dev, "ars_scan triggered\n");
 	}
-	acpi_desc->ars_start_flags = 0;
-	if (test_bit(ARS_SHORT, &flags))
-		acpi_desc->ars_start_flags |= ND_ARS_RETURN_PREV_DATA;
-	queue_work(nfit_wq, &acpi_desc->work);
-	dev_dbg(dev, "ars_scan triggered\n");
 	mutex_unlock(&acpi_desc->init_mutex);
 
-	return 0;
+	if (scheduled)
+		return 0;
+	if (busy)
+		return -EBUSY;
+	return -ENOTTY;
 }
 
 void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
@@ -3299,7 +3247,8 @@ void acpi_nfit_desc_init(struct acpi_nfit_desc *acpi_desc, struct device *dev)
 	INIT_LIST_HEAD(&acpi_desc->dimms);
 	INIT_LIST_HEAD(&acpi_desc->list);
 	mutex_init(&acpi_desc->init_mutex);
-	INIT_WORK(&acpi_desc->work, acpi_nfit_scrub);
+	acpi_desc->scrub_tmo = 1;
+	INIT_DELAYED_WORK(&acpi_desc->dwork, acpi_nfit_scrub);
 }
 EXPORT_SYMBOL_GPL(acpi_nfit_desc_init);
 
@@ -3323,6 +3272,7 @@ void acpi_nfit_shutdown(void *data)
 
 	mutex_lock(&acpi_desc->init_mutex);
 	acpi_desc->cancel = 1;
+	cancel_delayed_work_sync(&acpi_desc->dwork);
 	mutex_unlock(&acpi_desc->init_mutex);
 
 	/*
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h
index 45e7949986a8..7d15856a739f 100644
--- a/drivers/acpi/nfit/nfit.h
+++ b/drivers/acpi/nfit/nfit.h
@@ -197,18 +197,18 @@ struct acpi_nfit_desc {
 	struct device *dev;
 	u8 ars_start_flags;
 	struct nd_cmd_ars_status *ars_status;
-	struct work_struct work;
+	struct delayed_work dwork;
 	struct list_head list;
 	struct kernfs_node *scrub_count_state;
 	unsigned int max_ars;
 	unsigned int scrub_count;
 	unsigned int scrub_mode;
 	unsigned int cancel:1;
-	unsigned int init_complete:1;
 	unsigned long dimm_cmd_force_en;
 	unsigned long bus_cmd_force_en;
 	unsigned long bus_nfit_cmd_force_en;
 	unsigned int platform_cap;
+	unsigned int scrub_tmo;
 	int (*blk_do_io)(struct nd_blk_region *ndbr, resource_size_t dpa,
 			void *iobuf, u64 len, int rw);
 };
-- 
cgit v1.2.3


From bca811a7fd5a5c1c0644926161c2843322ab74b3 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 2 Apr 2018 15:28:03 -0700
Subject: nfit, address-range-scrub: add module option to skip initial ars

After attempting to quickly retrieve known errors the kernel proceeds to
kick off a long running ARS. Add a module option to disable this
behavior at initialization time, or at new region discovery time.
Otherwise, ARS can be started manually regardless of the state of this
setting.

Co-developed-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/acpi/nfit/core.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 2532294bbd68..7f3c70fde9a8 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -49,6 +49,10 @@ module_param(default_dsm_family, int, S_IRUGO);
 MODULE_PARM_DESC(default_dsm_family,
 		"Try this DSM type first when identifying NVDIMM family");
 
+static bool no_init_ars;
+module_param(no_init_ars, bool, 0644);
+MODULE_PARM_DESC(no_init_ars, "Skip ARS run at nfit init time");
+
 LIST_HEAD(acpi_descs);
 DEFINE_MUTEX(acpi_desc_lock);
 
@@ -2816,6 +2820,9 @@ static int ars_register(struct acpi_nfit_desc *acpi_desc, struct nfit_spa *nfit_
 {
 	int rc = *query_rc;
 
+	if (no_init_ars)
+		return acpi_nfit_register_region(acpi_desc, nfit_spa);
+
 	set_bit(ARS_REQ, &nfit_spa->ars_state);
 	set_bit(ARS_SHORT, &nfit_spa->ars_state);
 
-- 
cgit v1.2.3


From 291717b6fbdb175da88ae2144fc58d63a490128d Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 9 Apr 2018 09:10:22 -0700
Subject: libnvdimm, of_pmem: workaround OF_NUMA=n build error

Stephen reports that an x86 allmodconfig build fails to build the
of_pmem driver due to a missing definition of of_node_to_nid(). That
helper is currently only exported in the OF_NUMA=y case. In other cases,
ppc and sparc, it is a weak symbol, and outside of those platforms it is
a static inline.

Until an OF_NUMA=n configuration can reliably support usage of
of_node_to_nid() in modules across architectures, mark this driver as
'bool' instead of 'tristate'.

Cc: Rob Herring <robh@kernel.org>
Cc: Oliver O'Halloran <oohall@gmail.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 drivers/nvdimm/Kconfig | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 2d6862bf7436..f6c533c4d09b 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -103,7 +103,8 @@ config NVDIMM_DAX
 	  Select Y if unsure
 
 config OF_PMEM
-	tristate "Device-tree support for persistent memory regions"
+	# FIXME: make tristate once OF_NUMA dependency removed
+	bool "Device-tree support for persistent memory regions"
 	depends on OF
 	default LIBNVDIMM
 	help
-- 
cgit v1.2.3