From df2798bc778acadcd87d7ff98a4db47197defc5f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:51 -0800 Subject: dax/hmem: Move HMAT and Soft reservation probe initcall level In preparation for moving more filtering of "hmem" ranges into the dax_hmem.ko module, update the initcall levels. HMAT range registration moves to subsys_initcall() to be done before Soft Reservation probing, and Soft Reservation probing is moved to device_initcall() to be done before dax_hmem.ko initialization if it is built-in. Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602001107.1924368.11562316181038595611.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/Makefile | 3 ++- drivers/dax/hmem/device.c | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/hmem/Makefile b/drivers/dax/hmem/Makefile index 57377b4c3d47..d4c4cd6bccd7 100644 --- a/drivers/dax/hmem/Makefile +++ b/drivers/dax/hmem/Makefile @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 -obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o +# device_hmem.o deliberately precedes dax_hmem.o for initcall ordering obj-$(CONFIG_DEV_DAX_HMEM_DEVICES) += device_hmem.o +obj-$(CONFIG_DEV_DAX_HMEM) += dax_hmem.o device_hmem-y := device.o dax_hmem-y := hmem.o diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 903325aac991..20749c7fab81 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -104,4 +104,4 @@ static __init int hmem_init(void) * As this is a fallback for address ranges unclaimed by the ACPI HMAT * parsing it must be at an initcall level greater than hmat_init(). */ -late_initcall(hmem_init); +device_initcall(hmem_init); -- cgit v1.2.3 From 84fe17f8e9c68a4389c6e89b7ce3b4651b359989 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:06:56 -0800 Subject: dax/hmem: Drop unnecessary dax_hmem_remove() Empty driver remove callbacks can just be elided. Reviewed-by: Jonathan Cameron Reviewed-by: Gregory Price Tested-by: Fan Ni Reviewed-by: Vishal Verma Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602001664.1924368.9102029637928071240.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/hmem.c | 7 ------- 1 file changed, 7 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 1bf040dbc834..c7351e0dc8ff 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -44,15 +44,8 @@ static int dax_hmem_probe(struct platform_device *pdev) return 0; } -static int dax_hmem_remove(struct platform_device *pdev) -{ - /* devm handles teardown */ - return 0; -} - static struct platform_driver dax_hmem_driver = { .probe = dax_hmem_probe, - .remove = dax_hmem_remove, .driver = { .name = "hmem", }, -- cgit v1.2.3 From fe098574a93b4e2acb046b583e9857337d807f38 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:02 -0800 Subject: dax/hmem: Convey the dax range via memregion_info() In preparation for hmem platform devices to be unregistered, stop using platform_device_add_resources() to convey the address range. The platform_device_add_resources() API causes an existing "Soft Reserved" iomem resource to be re-parented under an inserted platform device resource. When that platform device is deleted it removes the platform device resource and all children. Instead, it is sufficient to convey just the address range and let request_mem_region() insert resources to indicate the devices active in the range. This allows the "Soft Reserved" resource to be re-enumerated upon the next probe event. Reviewed-by: Jonathan Cameron Tested-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602002217.1924368.7036275892522551624.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/hmem/device.c | 37 ++++++++++++++----------------------- drivers/dax/hmem/hmem.c | 14 +++----------- include/linux/memregion.h | 2 ++ 3 files changed, 19 insertions(+), 34 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index 20749c7fab81..b1b339bccfe5 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -15,15 +15,8 @@ static struct resource hmem_active = { .flags = IORESOURCE_MEM, }; -void hmem_register_device(int target_nid, struct resource *r) +void hmem_register_device(int target_nid, struct resource *res) { - /* define a clean / non-busy resource for the platform device */ - struct resource res = { - .start = r->start, - .end = r->end, - .flags = IORESOURCE_MEM, - .desc = IORES_DESC_SOFT_RESERVED, - }; struct platform_device *pdev; struct memregion_info info; int rc, id; @@ -31,55 +24,53 @@ void hmem_register_device(int target_nid, struct resource *r) if (nohmem) return; - rc = region_intersects(res.start, resource_size(&res), IORESOURCE_MEM, - IORES_DESC_SOFT_RESERVED); + rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_SOFT_RESERVED); if (rc != REGION_INTERSECTS) return; id = memregion_alloc(GFP_KERNEL); if (id < 0) { - pr_err("memregion allocation failure for %pr\n", &res); + pr_err("memregion allocation failure for %pr\n", res); return; } pdev = platform_device_alloc("hmem", id); if (!pdev) { - pr_err("hmem device allocation failure for %pr\n", &res); + pr_err("hmem device allocation failure for %pr\n", res); goto out_pdev; } - if (!__request_region(&hmem_active, res.start, resource_size(&res), + if (!__request_region(&hmem_active, res->start, resource_size(res), dev_name(&pdev->dev), 0)) { - dev_dbg(&pdev->dev, "hmem range %pr already active\n", &res); + dev_dbg(&pdev->dev, "hmem range %pr already active\n", res); goto out_active; } pdev->dev.numa_node = numa_map_to_online_node(target_nid); info = (struct memregion_info) { .target_node = target_nid, + .range = { + .start = res->start, + .end = res->end, + }, }; rc = platform_device_add_data(pdev, &info, sizeof(info)); if (rc < 0) { - pr_err("hmem memregion_info allocation failure for %pr\n", &res); - goto out_resource; - } - - rc = platform_device_add_resources(pdev, &res, 1); - if (rc < 0) { - pr_err("hmem resource allocation failure for %pr\n", &res); + pr_err("hmem memregion_info allocation failure for %pr\n", res); goto out_resource; } rc = platform_device_add(pdev); if (rc < 0) { - dev_err(&pdev->dev, "device add failed for %pr\n", &res); + dev_err(&pdev->dev, "device add failed for %pr\n", res); goto out_resource; } return; out_resource: - __release_region(&hmem_active, res.start, resource_size(&res)); + __release_region(&hmem_active, res->start, resource_size(res)); out_active: platform_device_put(pdev); out_pdev: diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index c7351e0dc8ff..5025a8c9850b 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -15,25 +15,17 @@ static int dax_hmem_probe(struct platform_device *pdev) struct memregion_info *mri; struct dev_dax_data data; struct dev_dax *dev_dax; - struct resource *res; - struct range range; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENOMEM; mri = dev->platform_data; - range.start = res->start; - range.end = res->end; - dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node, - PMD_SIZE, 0); + dax_region = alloc_dax_region(dev, pdev->id, &mri->range, + mri->target_node, PMD_SIZE, 0); if (!dax_region) return -ENOMEM; data = (struct dev_dax_data) { .dax_region = dax_region, .id = -1, - .size = region_idle ? 0 : resource_size(res), + .size = region_idle ? 0 : range_len(&mri->range), }; dev_dax = devm_create_dev_dax(&data); if (IS_ERR(dev_dax)) diff --git a/include/linux/memregion.h b/include/linux/memregion.h index bf83363807ac..c01321467789 100644 --- a/include/linux/memregion.h +++ b/include/linux/memregion.h @@ -3,10 +3,12 @@ #define _MEMREGION_H_ #include #include +#include #include struct memregion_info { int target_node; + struct range range; }; #ifdef CONFIG_MEMREGION -- cgit v1.2.3 From 7dab174e2e27eeaf10273e597ffbef4f8ea032bb Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:07 -0800 Subject: dax/hmem: Move hmem device registration to dax_hmem.ko In preparation for the CXL region driver to take over the responsibility of registering device-dax instances for CXL regions, move the registration of "hmem" devices to dax_hmem.ko. Previously the builtin component of this enabling (drivers/dax/hmem/device.o) would register platform devices for each address range and trigger the dax_hmem.ko module to load and attach device-dax instances to those devices. Now, the ranges are collected from the HMAT and EFI memory map walking, but the device creation is deferred. A new "hmem_platform" device is created which triggers dax_hmem.ko to load and register the platform devices. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602002771.1924368.5653558226424530127.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/acpi/numa/hmat.c | 2 +- drivers/dax/Kconfig | 2 +- drivers/dax/hmem/device.c | 91 +++++++++++++++++++--------------------- drivers/dax/hmem/hmem.c | 105 +++++++++++++++++++++++++++++++++++++++++++++- include/linux/dax.h | 7 +++- 5 files changed, 155 insertions(+), 52 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/acpi/numa/hmat.c b/drivers/acpi/numa/hmat.c index ff24282301ab..bba268ecd802 100644 --- a/drivers/acpi/numa/hmat.c +++ b/drivers/acpi/numa/hmat.c @@ -718,7 +718,7 @@ static void hmat_register_target_devices(struct memory_target *target) for (res = target->memregions.child; res; res = res->sibling) { int target_nid = pxm_to_node(target->memory_pxm); - hmem_register_device(target_nid, res); + hmem_register_resource(target_nid, res); } } diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 5fdf269a822e..d13c889c2a64 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -46,7 +46,7 @@ config DEV_DAX_HMEM Say M if unsure. config DEV_DAX_HMEM_DEVICES - depends on DEV_DAX_HMEM && DAX=y + depends on DEV_DAX_HMEM && DAX def_bool y config DEV_DAX_KMEM diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c index b1b339bccfe5..f9e1a76a04a9 100644 --- a/drivers/dax/hmem/device.c +++ b/drivers/dax/hmem/device.c @@ -8,6 +8,8 @@ static bool nohmem; module_param_named(disable, nohmem, bool, 0444); +static bool platform_initialized; +static DEFINE_MUTEX(hmem_resource_lock); static struct resource hmem_active = { .name = "HMEM devices", .start = 0, @@ -15,71 +17,66 @@ static struct resource hmem_active = { .flags = IORESOURCE_MEM, }; -void hmem_register_device(int target_nid, struct resource *res) +int walk_hmem_resources(struct device *host, walk_hmem_fn fn) +{ + struct resource *res; + int rc = 0; + + mutex_lock(&hmem_resource_lock); + for (res = hmem_active.child; res; res = res->sibling) { + rc = fn(host, (int) res->desc, res); + if (rc) + break; + } + mutex_unlock(&hmem_resource_lock); + return rc; +} +EXPORT_SYMBOL_GPL(walk_hmem_resources); + +static void __hmem_register_resource(int target_nid, struct resource *res) { struct platform_device *pdev; - struct memregion_info info; - int rc, id; + struct resource *new; + int rc; - if (nohmem) + new = __request_region(&hmem_active, res->start, resource_size(res), "", + 0); + if (!new) { + pr_debug("hmem range %pr already active\n", res); return; + } - rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, - IORES_DESC_SOFT_RESERVED); - if (rc != REGION_INTERSECTS) - return; + new->desc = target_nid; - id = memregion_alloc(GFP_KERNEL); - if (id < 0) { - pr_err("memregion allocation failure for %pr\n", res); + if (platform_initialized) return; - } - pdev = platform_device_alloc("hmem", id); + pdev = platform_device_alloc("hmem_platform", 0); if (!pdev) { - pr_err("hmem device allocation failure for %pr\n", res); - goto out_pdev; - } - - if (!__request_region(&hmem_active, res->start, resource_size(res), - dev_name(&pdev->dev), 0)) { - dev_dbg(&pdev->dev, "hmem range %pr already active\n", res); - goto out_active; - } - - pdev->dev.numa_node = numa_map_to_online_node(target_nid); - info = (struct memregion_info) { - .target_node = target_nid, - .range = { - .start = res->start, - .end = res->end, - }, - }; - rc = platform_device_add_data(pdev, &info, sizeof(info)); - if (rc < 0) { - pr_err("hmem memregion_info allocation failure for %pr\n", res); - goto out_resource; + pr_err_once("failed to register device-dax hmem_platform device\n"); + return; } rc = platform_device_add(pdev); - if (rc < 0) { - dev_err(&pdev->dev, "device add failed for %pr\n", res); - goto out_resource; - } + if (rc) + platform_device_put(pdev); + else + platform_initialized = true; +} - return; +void hmem_register_resource(int target_nid, struct resource *res) +{ + if (nohmem) + return; -out_resource: - __release_region(&hmem_active, res->start, resource_size(res)); -out_active: - platform_device_put(pdev); -out_pdev: - memregion_free(id); + mutex_lock(&hmem_resource_lock); + __hmem_register_resource(target_nid, res); + mutex_unlock(&hmem_resource_lock); } static __init int hmem_register_one(struct resource *res, void *data) { - hmem_register_device(phys_to_target_node(res->start), res); + hmem_register_resource(phys_to_target_node(res->start), res); return 0; } diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 5025a8c9850b..e7bdff3132fa 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -3,6 +3,7 @@ #include #include #include +#include #include "../bus.h" static bool region_idle; @@ -43,8 +44,110 @@ static struct platform_driver dax_hmem_driver = { }, }; -module_platform_driver(dax_hmem_driver); +static void release_memregion(void *data) +{ + memregion_free((long) data); +} + +static void release_hmem(void *pdev) +{ + platform_device_unregister(pdev); +} + +static int hmem_register_device(struct device *host, int target_nid, + const struct resource *res) +{ + struct platform_device *pdev; + struct memregion_info info; + long id; + int rc; + + rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_SOFT_RESERVED); + if (rc != REGION_INTERSECTS) + return 0; + + id = memregion_alloc(GFP_KERNEL); + if (id < 0) { + dev_err(host, "memregion allocation failure for %pr\n", res); + return -ENOMEM; + } + rc = devm_add_action_or_reset(host, release_memregion, (void *) id); + if (rc) + return rc; + + pdev = platform_device_alloc("hmem", id); + if (!pdev) { + dev_err(host, "device allocation failure for %pr\n", res); + return -ENOMEM; + } + + pdev->dev.numa_node = numa_map_to_online_node(target_nid); + info = (struct memregion_info) { + .target_node = target_nid, + .range = { + .start = res->start, + .end = res->end, + }, + }; + rc = platform_device_add_data(pdev, &info, sizeof(info)); + if (rc < 0) { + dev_err(host, "memregion_info allocation failure for %pr\n", + res); + goto out_put; + } + + rc = platform_device_add(pdev); + if (rc < 0) { + dev_err(host, "%s add failed for %pr\n", dev_name(&pdev->dev), + res); + goto out_put; + } + + return devm_add_action_or_reset(host, release_hmem, pdev); + +out_put: + platform_device_put(pdev); + return rc; +} + +static int dax_hmem_platform_probe(struct platform_device *pdev) +{ + return walk_hmem_resources(&pdev->dev, hmem_register_device); +} + +static struct platform_driver dax_hmem_platform_driver = { + .probe = dax_hmem_platform_probe, + .driver = { + .name = "hmem_platform", + }, +}; + +static __init int dax_hmem_init(void) +{ + int rc; + + rc = platform_driver_register(&dax_hmem_platform_driver); + if (rc) + return rc; + + rc = platform_driver_register(&dax_hmem_driver); + if (rc) + platform_driver_unregister(&dax_hmem_platform_driver); + + return rc; +} + +static __exit void dax_hmem_exit(void) +{ + platform_driver_unregister(&dax_hmem_driver); + platform_driver_unregister(&dax_hmem_platform_driver); +} + +module_init(dax_hmem_init); +module_exit(dax_hmem_exit); MODULE_ALIAS("platform:hmem*"); +MODULE_ALIAS("platform:hmem_platform*"); MODULE_LICENSE("GPL v2"); MODULE_AUTHOR("Intel Corporation"); diff --git a/include/linux/dax.h b/include/linux/dax.h index 2b5ecb591059..bf6258472e49 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -262,11 +262,14 @@ static inline bool dax_mapping(struct address_space *mapping) } #ifdef CONFIG_DEV_DAX_HMEM_DEVICES -void hmem_register_device(int target_nid, struct resource *r); +void hmem_register_resource(int target_nid, struct resource *r); #else -static inline void hmem_register_device(int target_nid, struct resource *r) +static inline void hmem_register_resource(int target_nid, struct resource *r) { } #endif +typedef int (*walk_hmem_fn)(struct device *dev, int target_nid, + const struct resource *res); +int walk_hmem_resources(struct device *dev, walk_hmem_fn fn); #endif -- cgit v1.2.3 From e9ee9fe3a9d4ae0e1e935fc2ec1218b66a043cae Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:13 -0800 Subject: dax: Assign RAM regions to memory-hotplug by default The default mode for device-dax instances is backwards for RAM-regions as evidenced by the fact that it tends to catch end users by surprise. "Where is my memory?". Recall that platforms are increasingly shipping with performance-differentiated memory pools beyond typical DRAM and NUMA effects. This includes HBM (high-bandwidth-memory) and CXL (dynamic interleave, varied media types, and future fabric attached possibilities). For this reason the EFI_MEMORY_SP (EFI Special Purpose Memory => Linux 'Soft Reserved') attribute is expected to be applied to all memory-pools that are not the general purpose pool. This designation gives an Operating System a chance to defer usage of a memory pool until later in the boot process where its performance properties can be interrogated and administrator policy can be applied. 'Soft Reserved' memory can be anything from too limited and precious to be part of the general purpose pool (HBM), too slow to host hot kernel data structures (some PMEM media), or anything in between. However, in the absence of an explicit policy, the memory should at least be made usable by default. The current device-dax default hides all non-general-purpose memory behind a device interface. The expectation is that the distribution of users that want the memory online by default vs device-dedicated-access by default follows the Pareto principle. A small number of enlightened users may want to do userspace memory management through a device, but general users just want the kernel to make the memory available with an option to get more advanced later. Arrange for all device-dax instances not backed by PMEM to default to attaching to the dax_kmem driver. From there the baseline memory hotplug policy (CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE / memhp_default_state=) gates whether the memory comes online or stays offline. Where, if it stays offline, it can be reliably converted back to device-mode where it can be partitioned, or fronted by a userspace allocator. So, if someone wants device-dax instances for their 'Soft Reserved' memory: 1/ Build a kernel with CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE=n or boot with memhp_default_state=offline, or roll the dice and hope that the kernel has not pinned a page in that memory before step 2. 2/ Write a udev rule to convert the target dax device(s) from 'system-ram' mode to 'devdax' mode: daxctl reconfigure-device $dax -m devdax -f Cc: Michal Hocko Cc: David Hildenbrand Cc: Dave Hansen Reviewed-by: Gregory Price Tested-by: Fan Ni Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167602003336.1924368.6809503401422267885.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/Kconfig | 2 +- drivers/dax/bus.c | 53 ++++++++++++++++++++----------------------------- drivers/dax/bus.h | 12 +++++++++-- drivers/dax/device.c | 3 +-- drivers/dax/hmem/hmem.c | 12 ++++++++++- drivers/dax/kmem.c | 1 + 6 files changed, 46 insertions(+), 37 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index d13c889c2a64..1163eb62e5f6 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -50,7 +50,7 @@ config DEV_DAX_HMEM_DEVICES def_bool y config DEV_DAX_KMEM - tristate "KMEM DAX: volatile-use of persistent memory" + tristate "KMEM DAX: map dax-devices as System-RAM" default DEV_DAX depends on DEV_DAX depends on MEMORY_HOTPLUG # for add_memory() and friends diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 1dad813ee4a6..012d576004e9 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -56,6 +56,25 @@ static int dax_match_id(struct dax_device_driver *dax_drv, struct device *dev) return match; } +static int dax_match_type(struct dax_device_driver *dax_drv, struct device *dev) +{ + enum dax_driver_type type = DAXDRV_DEVICE_TYPE; + struct dev_dax *dev_dax = to_dev_dax(dev); + + if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM) + type = DAXDRV_KMEM_TYPE; + + if (dax_drv->type == type) + return 1; + + /* default to device mode if dax_kmem is disabled */ + if (dax_drv->type == DAXDRV_DEVICE_TYPE && + !IS_ENABLED(CONFIG_DEV_DAX_KMEM)) + return 1; + + return 0; +} + enum id_action { ID_REMOVE, ID_ADD, @@ -216,14 +235,9 @@ static int dax_bus_match(struct device *dev, struct device_driver *drv) { struct dax_device_driver *dax_drv = to_dax_drv(drv); - /* - * All but the 'device-dax' driver, which has 'match_always' - * set, requires an exact id match. - */ - if (dax_drv->match_always) + if (dax_match_id(dax_drv, dev)) return 1; - - return dax_match_id(dax_drv, dev); + return dax_match_type(dax_drv, dev); } /* @@ -1413,13 +1427,10 @@ err_id: } EXPORT_SYMBOL_GPL(devm_create_dev_dax); -static int match_always_count; - int __dax_driver_register(struct dax_device_driver *dax_drv, struct module *module, const char *mod_name) { struct device_driver *drv = &dax_drv->drv; - int rc = 0; /* * dax_bus_probe() calls dax_drv->probe() unconditionally. @@ -1434,26 +1445,7 @@ int __dax_driver_register(struct dax_device_driver *dax_drv, drv->mod_name = mod_name; drv->bus = &dax_bus_type; - /* there can only be one default driver */ - mutex_lock(&dax_bus_lock); - match_always_count += dax_drv->match_always; - if (match_always_count > 1) { - match_always_count--; - WARN_ON(1); - rc = -EINVAL; - } - mutex_unlock(&dax_bus_lock); - if (rc) - return rc; - - rc = driver_register(drv); - if (rc && dax_drv->match_always) { - mutex_lock(&dax_bus_lock); - match_always_count -= dax_drv->match_always; - mutex_unlock(&dax_bus_lock); - } - - return rc; + return driver_register(drv); } EXPORT_SYMBOL_GPL(__dax_driver_register); @@ -1463,7 +1455,6 @@ void dax_driver_unregister(struct dax_device_driver *dax_drv) struct dax_id *dax_id, *_id; mutex_lock(&dax_bus_lock); - match_always_count -= dax_drv->match_always; list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { list_del(&dax_id->list); kfree(dax_id); diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h index fbb940293d6d..8cd79ab34292 100644 --- a/drivers/dax/bus.h +++ b/drivers/dax/bus.h @@ -11,7 +11,10 @@ struct dax_device; struct dax_region; void dax_region_put(struct dax_region *dax_region); -#define IORESOURCE_DAX_STATIC (1UL << 0) +/* dax bus specific ioresource flags */ +#define IORESOURCE_DAX_STATIC BIT(0) +#define IORESOURCE_DAX_KMEM BIT(1) + struct dax_region *alloc_dax_region(struct device *parent, int region_id, struct range *range, int target_node, unsigned int align, unsigned long flags); @@ -25,10 +28,15 @@ struct dev_dax_data { struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data); +enum dax_driver_type { + DAXDRV_KMEM_TYPE, + DAXDRV_DEVICE_TYPE, +}; + struct dax_device_driver { struct device_driver drv; struct list_head ids; - int match_always; + enum dax_driver_type type; int (*probe)(struct dev_dax *dev); void (*remove)(struct dev_dax *dev); }; diff --git a/drivers/dax/device.c b/drivers/dax/device.c index 5494d745ced5..ecdff79e31f2 100644 --- a/drivers/dax/device.c +++ b/drivers/dax/device.c @@ -475,8 +475,7 @@ EXPORT_SYMBOL_GPL(dev_dax_probe); static struct dax_device_driver device_dax_driver = { .probe = dev_dax_probe, - /* all probe actions are unwound by devm, so .remove isn't necessary */ - .match_always = 1, + .type = DAXDRV_DEVICE_TYPE, }; static int __init dax_init(void) diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index e7bdff3132fa..5ec08f9f8a57 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -11,15 +11,25 @@ module_param_named(region_idle, region_idle, bool, 0644); static int dax_hmem_probe(struct platform_device *pdev) { + unsigned long flags = IORESOURCE_DAX_KMEM; struct device *dev = &pdev->dev; struct dax_region *dax_region; struct memregion_info *mri; struct dev_dax_data data; struct dev_dax *dev_dax; + /* + * @region_idle == true indicates that an administrative agent + * wants to manipulate the range partitioning before the devices + * are created, so do not send them to the dax_kmem driver by + * default. + */ + if (region_idle) + flags = 0; + mri = dev->platform_data; dax_region = alloc_dax_region(dev, pdev->id, &mri->range, - mri->target_node, PMD_SIZE, 0); + mri->target_node, PMD_SIZE, flags); if (!dax_region) return -ENOMEM; diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 4852a2dbdb27..918d01d3fbaa 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -239,6 +239,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) static struct dax_device_driver device_dax_kmem_driver = { .probe = dev_dax_kmem_probe, .remove = dev_dax_kmem_remove, + .type = DAXDRV_KMEM_TYPE, }; static int __init dax_kmem_init(void) -- cgit v1.2.3 From 09d09e04d2fcf88c4620dd28097e0e2a8f720eac Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 10 Feb 2023 01:07:19 -0800 Subject: cxl/dax: Create dax devices for CXL RAM regions While platform firmware takes some responsibility for mapping the RAM capacity of CXL devices present at boot, the OS is responsible for mapping the remainder and hot-added devices. Platform firmware is also responsible for identifying the platform general purpose memory pool, typically DDR attached DRAM, and arranging for the remainder to be 'Soft Reserved'. That reservation allows the CXL subsystem to route the memory to core-mm via memory-hotplug (dax_kmem), or leave it for dedicated access (device-dax). The new 'struct cxl_dax_region' object allows for a CXL memory resource (region) to be published, but also allow for udev and module policy to act on that event. It also prevents cxl_core.ko from having a module loading dependency on any drivers/dax/ modules. Tested-by: Fan Ni Reviewed-by: Dave Jiang Reviewed-by: Jonathan Cameron Link: https://lore.kernel.org/r/167602003896.1924368.10335442077318970468.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- MAINTAINERS | 1 + drivers/cxl/acpi.c | 3 +- drivers/cxl/core/core.h | 3 ++ drivers/cxl/core/port.c | 4 +- drivers/cxl/core/region.c | 108 +++++++++++++++++++++++++++++++++++++++++++++- drivers/cxl/cxl.h | 12 ++++++ drivers/dax/Kconfig | 13 ++++++ drivers/dax/Makefile | 2 + drivers/dax/cxl.c | 53 +++++++++++++++++++++++ drivers/dax/hmem/hmem.c | 14 ++++++ 10 files changed, 209 insertions(+), 4 deletions(-) create mode 100644 drivers/dax/cxl.c (limited to 'drivers/dax') diff --git a/MAINTAINERS b/MAINTAINERS index 7f86d02cb427..73a9f3401e0e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6035,6 +6035,7 @@ M: Dan Williams M: Vishal Verma M: Dave Jiang L: nvdimm@lists.linux.dev +L: linux-cxl@vger.kernel.org S: Supported F: drivers/dax/ diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c index ad0849af42d7..8ebb9a74790d 100644 --- a/drivers/cxl/acpi.c +++ b/drivers/cxl/acpi.c @@ -731,7 +731,8 @@ static void __exit cxl_acpi_exit(void) cxl_bus_drain(); } -module_init(cxl_acpi_init); +/* load before dax_hmem sees 'Soft Reserved' CXL ranges */ +subsys_initcall(cxl_acpi_init); module_exit(cxl_acpi_exit); MODULE_LICENSE("GPL v2"); MODULE_IMPORT_NS(CXL); diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index 479f01da6d35..cde475e13216 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -15,12 +15,14 @@ extern struct device_attribute dev_attr_create_ram_region; extern struct device_attribute dev_attr_delete_region; extern struct device_attribute dev_attr_region; extern const struct device_type cxl_pmem_region_type; +extern const struct device_type cxl_dax_region_type; extern const struct device_type cxl_region_type; void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled); #define CXL_REGION_ATTR(x) (&dev_attr_##x.attr) #define CXL_REGION_TYPE(x) (&cxl_region_type) #define SET_CXL_REGION_ATTR(x) (&dev_attr_##x.attr), #define CXL_PMEM_REGION_TYPE(x) (&cxl_pmem_region_type) +#define CXL_DAX_REGION_TYPE(x) (&cxl_dax_region_type) int cxl_region_init(void); void cxl_region_exit(void); #else @@ -38,6 +40,7 @@ static inline void cxl_region_exit(void) #define CXL_REGION_TYPE(x) NULL #define SET_CXL_REGION_ATTR(x) #define CXL_PMEM_REGION_TYPE(x) NULL +#define CXL_DAX_REGION_TYPE(x) NULL #endif struct cxl_send_command; diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index b45d2796ef35..0bb7a5ff724b 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -46,6 +46,8 @@ static int cxl_device_id(struct device *dev) return CXL_DEVICE_NVDIMM; if (dev->type == CXL_PMEM_REGION_TYPE()) return CXL_DEVICE_PMEM_REGION; + if (dev->type == CXL_DAX_REGION_TYPE()) + return CXL_DEVICE_DAX_REGION; if (is_cxl_port(dev)) { if (is_cxl_root(to_cxl_port(dev))) return CXL_DEVICE_ROOT; @@ -2015,6 +2017,6 @@ static void cxl_core_exit(void) debugfs_remove_recursive(cxl_debugfs); } -module_init(cxl_core_init); +subsys_initcall(cxl_core_init); module_exit(cxl_core_exit); MODULE_LICENSE("GPL v2"); diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index 8c29204279e9..91bb9ac881ff 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -2278,6 +2278,75 @@ out: return cxlr_pmem; } +static void cxl_dax_region_release(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + + kfree(cxlr_dax); +} + +static const struct attribute_group *cxl_dax_region_attribute_groups[] = { + &cxl_base_attribute_group, + NULL, +}; + +const struct device_type cxl_dax_region_type = { + .name = "cxl_dax_region", + .release = cxl_dax_region_release, + .groups = cxl_dax_region_attribute_groups, +}; + +static bool is_cxl_dax_region(struct device *dev) +{ + return dev->type == &cxl_dax_region_type; +} + +struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + if (dev_WARN_ONCE(dev, !is_cxl_dax_region(dev), + "not a cxl_dax_region device\n")) + return NULL; + return container_of(dev, struct cxl_dax_region, dev); +} +EXPORT_SYMBOL_NS_GPL(to_cxl_dax_region, CXL); + +static struct lock_class_key cxl_dax_region_key; + +static struct cxl_dax_region *cxl_dax_region_alloc(struct cxl_region *cxlr) +{ + struct cxl_region_params *p = &cxlr->params; + struct cxl_dax_region *cxlr_dax; + struct device *dev; + + down_read(&cxl_region_rwsem); + if (p->state != CXL_CONFIG_COMMIT) { + cxlr_dax = ERR_PTR(-ENXIO); + goto out; + } + + cxlr_dax = kzalloc(sizeof(*cxlr_dax), GFP_KERNEL); + if (!cxlr_dax) { + cxlr_dax = ERR_PTR(-ENOMEM); + goto out; + } + + cxlr_dax->hpa_range.start = p->res->start; + cxlr_dax->hpa_range.end = p->res->end; + + dev = &cxlr_dax->dev; + cxlr_dax->cxlr = cxlr; + device_initialize(dev); + lockdep_set_class(&dev->mutex, &cxl_dax_region_key); + device_set_pm_not_required(dev); + dev->parent = &cxlr->dev; + dev->bus = &cxl_bus_type; + dev->type = &cxl_dax_region_type; +out: + up_read(&cxl_region_rwsem); + + return cxlr_dax; +} + static void cxlr_pmem_unregister(void *_cxlr_pmem) { struct cxl_pmem_region *cxlr_pmem = _cxlr_pmem; @@ -2362,6 +2431,42 @@ err_bridge: return rc; } +static void cxlr_dax_unregister(void *_cxlr_dax) +{ + struct cxl_dax_region *cxlr_dax = _cxlr_dax; + + device_unregister(&cxlr_dax->dev); +} + +static int devm_cxl_add_dax_region(struct cxl_region *cxlr) +{ + struct cxl_dax_region *cxlr_dax; + struct device *dev; + int rc; + + cxlr_dax = cxl_dax_region_alloc(cxlr); + if (IS_ERR(cxlr_dax)) + return PTR_ERR(cxlr_dax); + + dev = &cxlr_dax->dev; + rc = dev_set_name(dev, "dax_region%d", cxlr->id); + if (rc) + goto err; + + rc = device_add(dev); + if (rc) + goto err; + + dev_dbg(&cxlr->dev, "%s: register %s\n", dev_name(dev->parent), + dev_name(dev)); + + return devm_add_action_or_reset(&cxlr->dev, cxlr_dax_unregister, + cxlr_dax); +err: + put_device(dev); + return rc; +} + static int match_decoder_by_range(struct device *dev, void *data) { struct range *r1, *r2 = data; @@ -2624,8 +2729,7 @@ out: p->res->start, p->res->end, cxlr, is_system_ram) > 0) return 0; - dev_dbg(dev, "TODO: hookup devdax\n"); - return 0; + return devm_cxl_add_dax_region(cxlr); default: dev_dbg(&cxlr->dev, "unsupported region mode: %d\n", cxlr->mode); diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index 2ac344235235..b1395c46baec 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -513,6 +513,12 @@ struct cxl_pmem_region { struct cxl_pmem_region_mapping mapping[]; }; +struct cxl_dax_region { + struct device dev; + struct cxl_region *cxlr; + struct range hpa_range; +}; + /** * struct cxl_port - logical collection of upstream port devices and * downstream port devices to construct a CXL memory @@ -707,6 +713,7 @@ void cxl_driver_unregister(struct cxl_driver *cxl_drv); #define CXL_DEVICE_MEMORY_EXPANDER 5 #define CXL_DEVICE_REGION 6 #define CXL_DEVICE_PMEM_REGION 7 +#define CXL_DEVICE_DAX_REGION 8 #define MODULE_ALIAS_CXL(type) MODULE_ALIAS("cxl:t" __stringify(type) "*") #define CXL_MODALIAS_FMT "cxl:t%d" @@ -725,6 +732,7 @@ bool is_cxl_pmem_region(struct device *dev); struct cxl_pmem_region *to_cxl_pmem_region(struct device *dev); int cxl_add_to_region(struct cxl_port *root, struct cxl_endpoint_decoder *cxled); +struct cxl_dax_region *to_cxl_dax_region(struct device *dev); #else static inline bool is_cxl_pmem_region(struct device *dev) { @@ -739,6 +747,10 @@ static inline int cxl_add_to_region(struct cxl_port *root, { return 0; } +static inline struct cxl_dax_region *to_cxl_dax_region(struct device *dev) +{ + return NULL; +} #endif /* diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index 1163eb62e5f6..bd06e16c7ac8 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -45,6 +45,19 @@ config DEV_DAX_HMEM Say M if unsure. +config DEV_DAX_CXL + tristate "CXL DAX: direct access to CXL RAM regions" + depends on CXL_REGION && DEV_DAX + default CXL_REGION && DEV_DAX + help + CXL RAM regions are either mapped by platform-firmware + and published in the initial system-memory map as "System RAM", mapped + by platform-firmware as "Soft Reserved", or dynamically provisioned + after boot by the CXL driver. In the latter two cases a device-dax + instance is created to access that unmapped-by-default address range. + Per usual it can remain as dedicated access via a device interface, or + converted to "System RAM" via the dax_kmem facility. + config DEV_DAX_HMEM_DEVICES depends on DEV_DAX_HMEM && DAX def_bool y diff --git a/drivers/dax/Makefile b/drivers/dax/Makefile index 90a56ca3b345..5ed5c39857c8 100644 --- a/drivers/dax/Makefile +++ b/drivers/dax/Makefile @@ -3,10 +3,12 @@ obj-$(CONFIG_DAX) += dax.o obj-$(CONFIG_DEV_DAX) += device_dax.o obj-$(CONFIG_DEV_DAX_KMEM) += kmem.o obj-$(CONFIG_DEV_DAX_PMEM) += dax_pmem.o +obj-$(CONFIG_DEV_DAX_CXL) += dax_cxl.o dax-y := super.o dax-y += bus.o device_dax-y := device.o dax_pmem-y := pmem.o +dax_cxl-y := cxl.o obj-y += hmem/ diff --git a/drivers/dax/cxl.c b/drivers/dax/cxl.c new file mode 100644 index 000000000000..ccdf8de85bd5 --- /dev/null +++ b/drivers/dax/cxl.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2023 Intel Corporation. All rights reserved. */ +#include +#include + +#include "../cxl/cxl.h" +#include "bus.h" + +static int cxl_dax_region_probe(struct device *dev) +{ + struct cxl_dax_region *cxlr_dax = to_cxl_dax_region(dev); + int nid = phys_to_target_node(cxlr_dax->hpa_range.start); + struct cxl_region *cxlr = cxlr_dax->cxlr; + struct dax_region *dax_region; + struct dev_dax_data data; + struct dev_dax *dev_dax; + + if (nid == NUMA_NO_NODE) + nid = memory_add_physaddr_to_nid(cxlr_dax->hpa_range.start); + + dax_region = alloc_dax_region(dev, cxlr->id, &cxlr_dax->hpa_range, nid, + PMD_SIZE, IORESOURCE_DAX_KMEM); + if (!dax_region) + return -ENOMEM; + + data = (struct dev_dax_data) { + .dax_region = dax_region, + .id = -1, + .size = range_len(&cxlr_dax->hpa_range), + }; + dev_dax = devm_create_dev_dax(&data); + if (IS_ERR(dev_dax)) + return PTR_ERR(dev_dax); + + /* child dev_dax instances now own the lifetime of the dax_region */ + dax_region_put(dax_region); + return 0; +} + +static struct cxl_driver cxl_dax_region_driver = { + .name = "cxl_dax_region", + .probe = cxl_dax_region_probe, + .id = CXL_DEVICE_DAX_REGION, + .drv = { + .suppress_bind_attrs = true, + }, +}; + +module_cxl_driver(cxl_dax_region_driver); +MODULE_ALIAS_CXL(CXL_DEVICE_DAX_REGION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_IMPORT_NS(CXL); diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c index 5ec08f9f8a57..e5fe8b39fb94 100644 --- a/drivers/dax/hmem/hmem.c +++ b/drivers/dax/hmem/hmem.c @@ -72,6 +72,13 @@ static int hmem_register_device(struct device *host, int target_nid, long id; int rc; + if (IS_ENABLED(CONFIG_CXL_REGION) && + region_intersects(res->start, resource_size(res), IORESOURCE_MEM, + IORES_DESC_CXL) != REGION_DISJOINT) { + dev_dbg(host, "deferring range to CXL: %pr\n", res); + return 0; + } + rc = region_intersects(res->start, resource_size(res), IORESOURCE_MEM, IORES_DESC_SOFT_RESERVED); if (rc != REGION_INTERSECTS) @@ -157,6 +164,13 @@ static __exit void dax_hmem_exit(void) module_init(dax_hmem_init); module_exit(dax_hmem_exit); +/* Allow for CXL to define its own dax regions */ +#if IS_ENABLED(CONFIG_CXL_REGION) +#if IS_MODULE(CONFIG_CXL_ACPI) +MODULE_SOFTDEP("pre: cxl_acpi"); +#endif +#endif + MODULE_ALIAS("platform:hmem*"); MODULE_ALIAS("platform:hmem_platform*"); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3 From 0c16c83ed57fc66b033306ba426a5b324966a33e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2023 11:30:49 +0100 Subject: dax: cxl: add CXL_REGION dependency There is already a dependency on CXL_REGION, which depends on CXL_BUS, but since CXL_REGION is a 'bool' symbol, it's possible to configure DAX as built-in even though CXL itself is a loadable module: x86_64-linux-ld: drivers/dax/cxl.o: in function `cxl_dax_region_probe': cxl.c:(.text+0xb): undefined reference to `to_cxl_dax_region' x86_64-linux-ld: drivers/dax/cxl.o: in function `cxl_dax_region_driver_init': cxl.c:(.init.text+0x10): undefined reference to `__cxl_driver_register' x86_64-linux-ld: drivers/dax/cxl.o: in function `cxl_dax_region_driver_exit': cxl.c:(.exit.text+0x9): undefined reference to `cxl_driver_unregister' Prevent this with another depndency on the tristate symbol. Fixes: 09d09e04d2fc ("cxl/dax: Create dax devices for CXL RAM regions") Signed-off-by: Arnd Bergmann Link: https://lore.kernel.org/r/20230214103054.1082908-1-arnd@kernel.org Signed-off-by: Dan Williams --- drivers/dax/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/dax') diff --git a/drivers/dax/Kconfig b/drivers/dax/Kconfig index bd06e16c7ac8..7e1008d756b8 100644 --- a/drivers/dax/Kconfig +++ b/drivers/dax/Kconfig @@ -47,7 +47,7 @@ config DEV_DAX_HMEM config DEV_DAX_CXL tristate "CXL DAX: direct access to CXL RAM regions" - depends on CXL_REGION && DEV_DAX + depends on CXL_BUS && CXL_REGION && DEV_DAX default CXL_REGION && DEV_DAX help CXL RAM regions are either mapped by platform-firmware -- cgit v1.2.3 From e686c32590f40bffc45f105c04c836ffad3e531a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 16 Feb 2023 00:36:02 -0800 Subject: dax/kmem: Fix leak of memory-hotplug resources While experimenting with CXL region removal the following corruption of /proc/iomem appeared. Before: f010000000-f04fffffff : CXL Window 0 f010000000-f02fffffff : region4 f010000000-f02fffffff : dax4.0 f010000000-f02fffffff : System RAM (kmem) After (modprobe -r cxl_test): f010000000-f02fffffff : **redacted binary garbage** f010000000-f02fffffff : System RAM (kmem) ...and testing further the same is visible with persistent memory assigned to kmem: Before: 480000000-243fffffff : Persistent Memory 480000000-57e1fffff : namespace3.0 580000000-243fffffff : dax3.0 580000000-243fffffff : System RAM (kmem) After (ndctl disable-region all): 480000000-243fffffff : Persistent Memory 580000000-243fffffff : ***redacted binary garbage*** 580000000-243fffffff : System RAM (kmem) The corrupted data is from a use-after-free of the "dax4.0" and "dax3.0" resources, and it also shows that the "System RAM (kmem)" resource is not being removed. The bug does not appear after "modprobe -r kmem", it requires the parent of "dax4.0" and "dax3.0" to be removed which re-parents the leaked "System RAM (kmem)" instances. Those in turn reference the freed resource as a parent. First up for the fix is release_mem_region_adjustable() needs to reliably delete the resource inserted by add_memory_driver_managed(). That is thwarted by a check for IORESOURCE_SYSRAM that predates the dax/kmem driver, from commit: 65c78784135f ("kernel, resource: check for IORESOURCE_SYSRAM in release_mem_region_adjustable") That appears to be working around the behavior of HMM's "MEMORY_DEVICE_PUBLIC" facility that has since been deleted. With that check removed the "System RAM (kmem)" resource gets removed, but corruption still occurs occasionally because the "dax" resource is not reliably removed. The dax range information is freed before the device is unregistered, so the driver can not reliably recall (another use after free) what it is meant to release. Lastly if that use after free got lucky, the driver was covering up the leak of "System RAM (kmem)" due to its use of release_resource() which detaches, but does not free, child resources. The switch to remove_resource() forces remove_memory() to be responsible for the deletion of the resource added by add_memory_driver_managed(). Fixes: c2f3011ee697 ("device-dax: add an allocation interface for device-dax instances") Cc: Cc: Oscar Salvador Cc: David Hildenbrand Cc: Pavel Tatashin Reviewed-by: Vishal Verma Reviewed-by: Pasha Tatashin Reviewed-by: Dave Jiang Link: https://lore.kernel.org/r/167653656244.3147810.5705900882794040229.stgit@dwillia2-xfh.jf.intel.com Signed-off-by: Dan Williams --- drivers/dax/bus.c | 2 +- drivers/dax/kmem.c | 4 ++-- kernel/resource.c | 14 -------------- 3 files changed, 3 insertions(+), 17 deletions(-) (limited to 'drivers/dax') diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 012d576004e9..67a64f4c472d 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -441,8 +441,8 @@ static void unregister_dev_dax(void *dev) dev_dbg(dev, "%s\n", __func__); kill_dev_dax(dev_dax); - free_dev_dax_ranges(dev_dax); device_del(dev); + free_dev_dax_ranges(dev_dax); put_device(dev); } diff --git a/drivers/dax/kmem.c b/drivers/dax/kmem.c index 918d01d3fbaa..7b36db6f1cbd 100644 --- a/drivers/dax/kmem.c +++ b/drivers/dax/kmem.c @@ -146,7 +146,7 @@ static int dev_dax_kmem_probe(struct dev_dax *dev_dax) if (rc) { dev_warn(dev, "mapping%d: %#llx-%#llx memory add failed\n", i, range.start, range.end); - release_resource(res); + remove_resource(res); kfree(res); data->res[i] = NULL; if (mapped) @@ -195,7 +195,7 @@ static void dev_dax_kmem_remove(struct dev_dax *dev_dax) rc = remove_memory(range.start, range_len(&range)); if (rc == 0) { - release_resource(data->res[i]); + remove_resource(data->res[i]); kfree(data->res[i]); data->res[i] = NULL; success++; diff --git a/kernel/resource.c b/kernel/resource.c index ddbbacb9fb50..b1763b2fd7ef 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1343,20 +1343,6 @@ retry: continue; } - /* - * All memory regions added from memory-hotplug path have the - * flag IORESOURCE_SYSTEM_RAM. If the resource does not have - * this flag, we know that we are dealing with a resource coming - * from HMM/devm. HMM/devm use another mechanism to add/release - * a resource. This goes via devm_request_mem_region and - * devm_release_mem_region. - * HMM/devm take care to release their resources when they want, - * so if we are dealing with them, let us just back off here. - */ - if (!(res->flags & IORESOURCE_SYSRAM)) { - break; - } - if (!(res->flags & IORESOURCE_MEM)) break; -- cgit v1.2.3