diff options
-rw-r--r-- | drivers/cxl/core/cdat.c | 101 | ||||
-rw-r--r-- | drivers/cxl/core/core.h | 4 | ||||
-rw-r--r-- | drivers/cxl/core/hdm.c | 319 | ||||
-rw-r--r-- | drivers/cxl/core/mbox.c | 66 | ||||
-rw-r--r-- | drivers/cxl/core/memdev.c | 66 | ||||
-rw-r--r-- | drivers/cxl/core/port.c | 21 | ||||
-rw-r--r-- | drivers/cxl/core/region.c | 137 | ||||
-rw-r--r-- | drivers/cxl/cxl.h | 39 | ||||
-rw-r--r-- | drivers/cxl/cxlmem.h | 52 | ||||
-rw-r--r-- | drivers/cxl/mem.c | 2 | ||||
-rw-r--r-- | drivers/cxl/pci.c | 7 | ||||
-rw-r--r-- | tools/testing/cxl/test/cxl.c | 22 | ||||
-rw-r--r-- | tools/testing/cxl/test/mem.c | 7 |
13 files changed, 519 insertions, 324 deletions
diff --git a/drivers/cxl/core/cdat.c b/drivers/cxl/core/cdat.c index 8153f8d83a16..bfba7f5019cf 100644 --- a/drivers/cxl/core/cdat.c +++ b/drivers/cxl/core/cdat.c @@ -258,27 +258,29 @@ static void update_perf_entry(struct device *dev, struct dsmas_entry *dent, static void cxl_memdev_set_qos_class(struct cxl_dev_state *cxlds, struct xarray *dsmas_xa) { - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct device *dev = cxlds->dev; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, - }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, - }; struct dsmas_entry *dent; unsigned long index; xa_for_each(dsmas_xa, index, dent) { - if (resource_size(&cxlds->ram_res) && - range_contains(&ram_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->ram_perf); - else if (resource_size(&cxlds->pmem_res) && - range_contains(&pmem_range, &dent->dpa_range)) - update_perf_entry(dev, dent, &mds->pmem_perf); - else + bool found = false; + + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; + struct range range = { + .start = res->start, + .end = res->end, + }; + + if (range_contains(&range, &dent->dpa_range)) { + update_perf_entry(dev, dent, + &cxlds->part[i].perf); + found = true; + break; + } + } + + if (!found) dev_dbg(dev, "no partition for dsmas dpa: %pra\n", &dent->dpa_range); } @@ -343,36 +345,46 @@ static int match_cxlrd_hb(struct device *dev, void *data) return 0; } -static int cxl_qos_class_verify(struct cxl_memdev *cxlmd) +static void cxl_qos_class_verify(struct cxl_memdev *cxlmd) { struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct cxl_port *root_port; - int rc; struct cxl_root *cxl_root __free(put_cxl_root) = find_cxl_root(cxlmd->endpoint); + /* + * No need to reset_dpa_perf() here as find_cxl_root() is guaranteed to + * succeed when called in the cxl_endpoint_port_probe() path. + */ if (!cxl_root) - return -ENODEV; + return; root_port = &cxl_root->port; - /* Check that the QTG IDs are all sane between end device and root decoders */ - if (!cxl_qos_match(root_port, &mds->ram_perf)) - reset_dpa_perf(&mds->ram_perf); - if (!cxl_qos_match(root_port, &mds->pmem_perf)) - reset_dpa_perf(&mds->pmem_perf); - - /* Check to make sure that the device's host bridge is under a root decoder */ - rc = device_for_each_child(&root_port->dev, - cxlmd->endpoint->host_bridge, match_cxlrd_hb); - if (!rc) { - reset_dpa_perf(&mds->ram_perf); - reset_dpa_perf(&mds->pmem_perf); + /* + * Save userspace from needing to check if a qos class has any matches + * by hiding qos class info if the memdev is not mapped by a root + * decoder, or the partition class does not match any root decoder + * class. + */ + if (!device_for_each_child(&root_port->dev, + cxlmd->endpoint->host_bridge, + match_cxlrd_hb)) { + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + + reset_dpa_perf(perf); + } + return; } - return rc; + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + + if (!cxl_qos_match(root_port, perf)) + reset_dpa_perf(perf); + } } static void discard_dsmas(struct xarray *xa) @@ -570,23 +582,18 @@ static bool dpa_perf_contains(struct cxl_dpa_perf *perf, return range_contains(&perf->dpa_range, &dpa); } -static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +static struct cxl_dpa_perf *cxled_get_dpa_perf(struct cxl_endpoint_decoder *cxled) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_dpa_perf *perf; - switch (mode) { - case CXL_DECODER_RAM: - perf = &mds->ram_perf; - break; - case CXL_DECODER_PMEM: - perf = &mds->pmem_perf; - break; - default: + if (cxled->part < 0) + return ERR_PTR(-EINVAL); + perf = &cxlds->part[cxled->part].perf; + + if (!perf) return ERR_PTR(-EINVAL); - } if (!dpa_perf_contains(perf, cxled->dpa_res)) return ERR_PTR(-EINVAL); @@ -647,7 +654,7 @@ static int cxl_endpoint_gather_bandwidth(struct cxl_region *cxlr, if (cxlds->rcd) return -ENODEV; - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return PTR_ERR(perf); @@ -1053,7 +1060,7 @@ void cxl_region_perf_data_calculate(struct cxl_region *cxlr, lockdep_assert_held(&cxl_dpa_rwsem); - perf = cxled_get_dpa_perf(cxled, cxlr->mode); + perf = cxled_get_dpa_perf(cxled); if (IS_ERR(perf)) return; diff --git a/drivers/cxl/core/core.h b/drivers/cxl/core/core.h index c87dbfcc9403..83c690ca6fad 100644 --- a/drivers/cxl/core/core.h +++ b/drivers/cxl/core/core.h @@ -72,8 +72,8 @@ void __iomem *devm_cxl_iomap_block(struct device *dev, resource_size_t addr, resource_size_t length); struct dentry *cxl_debugfs_create_dir(const char *dir); -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode); +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode); int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size); int cxl_dpa_free(struct cxl_endpoint_decoder *cxled); resource_size_t cxl_dpa_size(struct cxl_endpoint_decoder *cxled); diff --git a/drivers/cxl/core/hdm.c b/drivers/cxl/core/hdm.c index 50e6a45b30ba..d705dec1471e 100644 --- a/drivers/cxl/core/hdm.c +++ b/drivers/cxl/core/hdm.c @@ -223,6 +223,37 @@ void cxl_dpa_debug(struct seq_file *file, struct cxl_dev_state *cxlds) } EXPORT_SYMBOL_NS_GPL(cxl_dpa_debug, "CXL"); +/* See request_skip() kernel-doc */ +static resource_size_t __adjust_skip(struct cxl_dev_state *cxlds, + const resource_size_t skip_base, + const resource_size_t skip_len, + const char *requester) +{ + const resource_size_t skip_end = skip_base + skip_len - 1; + + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *part_res = &cxlds->part[i].res; + resource_size_t adjust_start, adjust_end, size; + + adjust_start = max(skip_base, part_res->start); + adjust_end = min(skip_end, part_res->end); + + if (adjust_end < adjust_start) + continue; + + size = adjust_end - adjust_start + 1; + + if (!requester) + __release_region(&cxlds->dpa_res, adjust_start, size); + else if (!__request_region(&cxlds->dpa_res, adjust_start, size, + requester, 0)) + return adjust_start - skip_base; + } + + return skip_len; +} +#define release_skip(c, b, l) __adjust_skip((c), (b), (l), NULL) + /* * Must be called in a context that synchronizes against this decoder's * port ->remove() callback (like an endpoint decoder sysfs attribute) @@ -241,7 +272,7 @@ static void __cxl_dpa_release(struct cxl_endpoint_decoder *cxled) skip_start = res->start - cxled->skip; __release_region(&cxlds->dpa_res, res->start, resource_size(res)); if (cxled->skip) - __release_region(&cxlds->dpa_res, skip_start, cxled->skip); + release_skip(cxlds, skip_start, cxled->skip); cxled->skip = 0; cxled->dpa_res = NULL; put_device(&cxled->cxld.dev); @@ -268,6 +299,58 @@ static void devm_cxl_dpa_release(struct cxl_endpoint_decoder *cxled) __cxl_dpa_release(cxled); } +/** + * request_skip() - Track DPA 'skip' in @cxlds->dpa_res resource tree + * @cxlds: CXL.mem device context that parents @cxled + * @cxled: Endpoint decoder establishing new allocation that skips lower DPA + * @skip_base: DPA < start of new DPA allocation (DPAnew) + * @skip_len: @skip_base + @skip_len == DPAnew + * + * DPA 'skip' arises from out-of-sequence DPA allocation events relative + * to free capacity across multiple partitions. It is a wasteful event + * as usable DPA gets thrown away, but if a deployment has, for example, + * a dual RAM+PMEM device, wants to use PMEM, and has unallocated RAM + * DPA, the free RAM DPA must be sacrificed to start allocating PMEM. + * See third "Implementation Note" in CXL 3.1 8.2.4.19.13 "Decoder + * Protection" for more details. + * + * A 'skip' always covers the last allocated DPA in a previous partition + * to the start of the current partition to allocate. Allocations never + * start in the middle of a partition, and allocations are always + * de-allocated in reverse order (see cxl_dpa_free(), or natural devm + * unwind order from forced in-order allocation). + * + * If @cxlds->nr_partitions was guaranteed to be <= 2 then the 'skip' + * would always be contained to a single partition. Given + * @cxlds->nr_partitions may be > 2 it results in cases where the 'skip' + * might span "tail capacity of partition[0], all of partition[1], ..., + * all of partition[N-1]" to support allocating from partition[N]. That + * in turn interacts with the partition 'struct resource' boundaries + * within @cxlds->dpa_res whereby 'skip' requests need to be divided by + * partition. I.e. this is a quirk of using a 'struct resource' tree to + * detect range conflicts while also tracking partition boundaries in + * @cxlds->dpa_res. + */ +static int request_skip(struct cxl_dev_state *cxlds, + struct cxl_endpoint_decoder *cxled, + const resource_size_t skip_base, + const resource_size_t skip_len) +{ + resource_size_t skipped = __adjust_skip(cxlds, skip_base, skip_len, + dev_name(&cxled->cxld.dev)); + + if (skipped == skip_len) + return 0; + + dev_dbg(cxlds->dev, + "%s: failed to reserve skipped space (%pa %pa %pa)\n", + dev_name(&cxled->cxld.dev), &skip_base, &skip_len, &skipped); + + release_skip(cxlds, skip_base, skipped); + + return -EBUSY; +} + static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) @@ -277,6 +360,7 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &port->dev; struct resource *res; + int rc; lockdep_assert_held_write(&cxl_dpa_rwsem); @@ -305,14 +389,9 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, } if (skipped) { - res = __request_region(&cxlds->dpa_res, base - skipped, skipped, - dev_name(&cxled->cxld.dev), 0); - if (!res) { - dev_dbg(dev, - "decoder%d.%d: failed to reserve skipped space\n", - port->id, cxled->cxld.id); - return -EBUSY; - } + rc = request_skip(cxlds, cxled, base - skipped, skipped); + if (rc) + return rc; } res = __request_region(&cxlds->dpa_res, base, len, dev_name(&cxled->cxld.dev), 0); @@ -320,28 +399,117 @@ static int __cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, dev_dbg(dev, "decoder%d.%d: failed to reserve allocation\n", port->id, cxled->cxld.id); if (skipped) - __release_region(&cxlds->dpa_res, base - skipped, - skipped); + release_skip(cxlds, base - skipped, skipped); return -EBUSY; } cxled->dpa_res = res; cxled->skip = skipped; - if (resource_contains(&cxlds->pmem_res, res)) - cxled->mode = CXL_DECODER_PMEM; - else if (resource_contains(&cxlds->ram_res, res)) - cxled->mode = CXL_DECODER_RAM; - else { - dev_warn(dev, "decoder%d.%d: %pr mixed mode not supported\n", - port->id, cxled->cxld.id, cxled->dpa_res); - cxled->mode = CXL_DECODER_MIXED; - } + /* + * When allocating new capacity, ->part is already set, when + * discovering decoder settings at initial enumeration, ->part + * is not set. + */ + if (cxled->part < 0) + for (int i = 0; cxlds->nr_partitions; i++) + if (resource_contains(&cxlds->part[i].res, res)) { + cxled->part = i; + break; + } + + if (cxled->part < 0) + dev_warn(dev, "decoder%d.%d: %pr does not map any partition\n", + port->id, cxled->cxld.id, res); port->hdm_end++; get_device(&cxled->cxld.dev); return 0; } +static int add_dpa_res(struct device *dev, struct resource *parent, + struct resource *res, resource_size_t start, + resource_size_t size, const char *type) +{ + int rc; + + *res = (struct resource) { + .name = type, + .start = start, + .end = start + size - 1, + .flags = IORESOURCE_MEM, + }; + if (resource_size(res) == 0) { + dev_dbg(dev, "DPA(%s): no capacity\n", res->name); + return 0; + } + rc = request_resource(parent, res); + if (rc) { + dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, + res, rc); + return rc; + } + + dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); + + return 0; +} + +static const char *cxl_mode_name(enum cxl_partition_mode mode) +{ + switch (mode) { + case CXL_PARTMODE_RAM: + return "ram"; + case CXL_PARTMODE_PMEM: + return "pmem"; + default: + return ""; + }; +} + +/* if this fails the caller must destroy @cxlds, there is no recovery */ +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info) +{ + struct device *dev = cxlds->dev; + + guard(rwsem_write)(&cxl_dpa_rwsem); + + if (cxlds->nr_partitions) + return -EBUSY; + + if (!info->size || !info->nr_partitions) { + cxlds->dpa_res = DEFINE_RES_MEM(0, 0); + cxlds->nr_partitions = 0; + return 0; + } + + cxlds->dpa_res = DEFINE_RES_MEM(0, info->size); + + for (int i = 0; i < info->nr_partitions; i++) { + const struct cxl_dpa_part_info *part = &info->part[i]; + int rc; + + cxlds->part[i].perf.qos_class = CXL_QOS_CLASS_INVALID; + cxlds->part[i].mode = part->mode; + + /* Require ordered + contiguous partitions */ + if (i) { + const struct cxl_dpa_part_info *prev = &info->part[i - 1]; + + if (prev->range.end + 1 != part->range.start) + return -EINVAL; + } + rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->part[i].res, + part->range.start, range_len(&part->range), + cxl_mode_name(part->mode)); + if (rc) + return rc; + cxlds->nr_partitions++; + } + + return 0; +} +EXPORT_SYMBOL_GPL(cxl_dpa_setup); + int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped) @@ -418,53 +586,46 @@ out: return rc; } -int cxl_dpa_set_mode(struct cxl_endpoint_decoder *cxled, - enum cxl_decoder_mode mode) +int cxl_dpa_set_part(struct cxl_endpoint_decoder *cxled, + enum cxl_partition_mode mode) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - - switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: - break; - default: - dev_dbg(dev, "unsupported mode: %d\n", mode); - return -EINVAL; - } + int part; guard(rwsem_write)(&cxl_dpa_rwsem); if (cxled->cxld.flags & CXL_DECODER_F_ENABLE) return -EBUSY; - /* - * Only allow modes that are supported by the current partition - * configuration - */ - if (mode == CXL_DECODER_PMEM && !resource_size(&cxlds->pmem_res)) { - dev_dbg(dev, "no available pmem capacity\n"); - return -ENXIO; + for (part = 0; part < cxlds->nr_partitions; part++) + if (cxlds->part[part].mode == mode) + break; + + if (part >= cxlds->nr_partitions) { + dev_dbg(dev, "unsupported mode: %d\n", mode); + return -EINVAL; } - if (mode == CXL_DECODER_RAM && !resource_size(&cxlds->ram_res)) { - dev_dbg(dev, "no available ram capacity\n"); + + if (!resource_size(&cxlds->part[part].res)) { + dev_dbg(dev, "no available capacity for mode: %d\n", mode); return -ENXIO; } - cxled->mode = mode; + cxled->part = part; return 0; } int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); - resource_size_t free_ram_start, free_pmem_start; struct cxl_port *port = cxled_to_port(cxled); struct cxl_dev_state *cxlds = cxlmd->cxlds; struct device *dev = &cxled->cxld.dev; - resource_size_t start, avail, skip; + struct resource *res, *prev = NULL; + resource_size_t start, avail, skip, skip_start; struct resource *p, *last; - int rc; + int part, rc; down_write(&cxl_dpa_rwsem); if (cxled->cxld.region) { @@ -480,50 +641,50 @@ int cxl_dpa_alloc(struct cxl_endpoint_decoder *cxled, unsigned long long size) goto out; } - for (p = cxlds->ram_res.child, last = NULL; p; p = p->sibling) - last = p; - if (last) - free_ram_start = last->end + 1; - else - free_ram_start = cxlds->ram_res.start; + part = cxled->part; + if (part < 0) { + dev_dbg(dev, "partition not set\n"); + rc = -EBUSY; + goto out; + } - for (p = cxlds->pmem_res.child, last = NULL; p; p = p->sibling) + res = &cxlds->part[part].res; + for (p = res->child, last = NULL; p; p = p->sibling) last = p; if (last) - free_pmem_start = last->end + 1; + start = last->end + 1; else - free_pmem_start = cxlds->pmem_res.start; - - if (cxled->mode == CXL_DECODER_RAM) { - start = free_ram_start; - avail = cxlds->ram_res.end - start + 1; - skip = 0; - } else if (cxled->mode == CXL_DECODER_PMEM) { - resource_size_t skip_start, skip_end; - - start = free_pmem_start; - avail = cxlds->pmem_res.end - start + 1; - skip_start = free_ram_start; + start = res->start; - /* - * If some pmem is already allocated, then that allocation - * already handled the skip. - */ - if (cxlds->pmem_res.child && - skip_start == cxlds->pmem_res.child->start) - skip_end = skip_start - 1; - else - skip_end = start - 1; - skip = skip_end - skip_start + 1; - } else { - dev_dbg(dev, "mode not set\n"); - rc = -EINVAL; - goto out; + /* + * To allocate at partition N, a skip needs to be calculated for all + * unallocated space at lower partitions indices. + * + * If a partition has any allocations, the search can end because a + * previous cxl_dpa_alloc() invocation is assumed to have accounted for + * all previous partitions. + */ + skip_start = CXL_RESOURCE_NONE; + for (int i = part; i; i--) { + prev = &cxlds->part[i - 1].res; + for (p = prev->child, last = NULL; p; p = p->sibling) + last = p; + if (last) { + skip_start = last->end + 1; + break; + } + skip_start = prev->start; } + avail = res->end - start + 1; + if (skip_start == CXL_RESOURCE_NONE) + skip = 0; + else + skip = res->start - skip_start; + if (size > avail) { dev_dbg(dev, "%pa exceeds available %s capacity: %pa\n", &size, - cxl_decoder_mode_name(cxled->mode), &avail); + res->name, &avail); rc = -ENOSPC; goto out; } diff --git a/drivers/cxl/core/mbox.c b/drivers/cxl/core/mbox.c index 548564c770c0..62bb3653362f 100644 --- a/drivers/cxl/core/mbox.c +++ b/drivers/cxl/core/mbox.c @@ -1241,55 +1241,39 @@ int cxl_mem_sanitize(struct cxl_memdev *cxlmd, u16 cmd) return rc; } -static int add_dpa_res(struct device *dev, struct resource *parent, - struct resource *res, resource_size_t start, - resource_size_t size, const char *type) +static void add_part(struct cxl_dpa_info *info, u64 start, u64 size, enum cxl_partition_mode mode) { - int rc; + int i = info->nr_partitions; - res->name = type; - res->start = start; - res->end = start + size - 1; - res->flags = IORESOURCE_MEM; - if (resource_size(res) == 0) { - dev_dbg(dev, "DPA(%s): no capacity\n", res->name); - return 0; - } - rc = request_resource(parent, res); - if (rc) { - dev_err(dev, "DPA(%s): failed to track %pr (%d)\n", res->name, - res, rc); - return rc; - } - - dev_dbg(dev, "DPA(%s): %pr\n", res->name, res); + if (size == 0) + return; - return 0; + info->part[i].range = (struct range) { + .start = start, + .end = start + size - 1, + }; + info->part[i].mode = mode; + info->nr_partitions++; } -int cxl_mem_create_range_info(struct cxl_memdev_state *mds) +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info) { struct cxl_dev_state *cxlds = &mds->cxlds; struct device *dev = cxlds->dev; int rc; if (!cxlds->media_ready) { - cxlds->dpa_res = DEFINE_RES_MEM(0, 0); - cxlds->ram_res = DEFINE_RES_MEM(0, 0); - cxlds->pmem_res = DEFINE_RES_MEM(0, 0); + info->size = 0; return 0; } - cxlds->dpa_res = DEFINE_RES_MEM(0, mds->total_bytes); + info->size = mds->total_bytes; if (mds->partition_align_bytes == 0) { - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - mds->volatile_only_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - mds->volatile_only_bytes, - mds->persistent_only_bytes, "pmem"); + add_part(info, 0, mds->volatile_only_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->volatile_only_bytes, + mds->persistent_only_bytes, CXL_PARTMODE_PMEM); + return 0; } rc = cxl_mem_get_partition_info(mds); @@ -1298,15 +1282,13 @@ int cxl_mem_create_range_info(struct cxl_memdev_state *mds) return rc; } - rc = add_dpa_res(dev, &cxlds->dpa_res, &cxlds->ram_res, 0, - mds->active_volatile_bytes, "ram"); - if (rc) - return rc; - return add_dpa_res(dev, &cxlds->dpa_res, &cxlds->pmem_res, - mds->active_volatile_bytes, - mds->active_persistent_bytes, "pmem"); + add_part(info, 0, mds->active_volatile_bytes, CXL_PARTMODE_RAM); + add_part(info, mds->active_volatile_bytes, mds->active_persistent_bytes, + CXL_PARTMODE_PMEM); + + return 0; } -EXPORT_SYMBOL_NS_GPL(cxl_mem_create_range_info, "CXL"); +EXPORT_SYMBOL_NS_GPL(cxl_mem_dpa_fetch, "CXL"); int cxl_set_timestamp(struct cxl_memdev_state *mds) { @@ -1450,8 +1432,6 @@ struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev) mds->cxlds.reg_map.host = dev; mds->cxlds.reg_map.resource = CXL_RESOURCE_NONE; mds->cxlds.type = CXL_DEVTYPE_CLASSMEM; - mds->ram_perf.qos_class = CXL_QOS_CLASS_INVALID; - mds->pmem_perf.qos_class = CXL_QOS_CLASS_INVALID; return mds; } diff --git a/drivers/cxl/core/memdev.c b/drivers/cxl/core/memdev.c index ae3dfcbe8938..63c6c681125d 100644 --- a/drivers/cxl/core/memdev.c +++ b/drivers/cxl/core/memdev.c @@ -75,12 +75,20 @@ static ssize_t label_storage_size_show(struct device *dev, } static DEVICE_ATTR_RO(label_storage_size); +static resource_size_t cxl_ram_size(struct cxl_dev_state *cxlds) +{ + /* Static RAM is only expected at partition 0. */ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return 0; + return resource_size(&cxlds->part[0].res); +} + static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->ram_res); + unsigned long long len = cxl_ram_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } @@ -93,7 +101,7 @@ static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - unsigned long long len = resource_size(&cxlds->pmem_res); + unsigned long long len = cxl_pmem_size(cxlds); return sysfs_emit(buf, "%#llx\n", len); } @@ -198,22 +206,17 @@ static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) int rc = 0; /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ - if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); - rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc) - return rc; - } - if (resource_size(&cxlds->ram_res)) { - offset = cxlds->ram_res.start; - length = resource_size(&cxlds->ram_res); + for (int i = 0; i < cxlds->nr_partitions; i++) { + const struct resource *res = &cxlds->part[i].res; + + offset = res->start; + length = resource_size(res); rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); /* * Invalid Physical Address is not an error for * volatile addresses. Device support is optional. */ - if (rc == -EFAULT) + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) rc = 0; } return rc; @@ -404,14 +407,21 @@ static struct attribute *cxl_memdev_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_pmem_perf(struct cxl_dev_state *cxlds) +{ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return &cxlds->part[i].perf; + return NULL; +} + static ssize_t pmem_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->pmem_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_pmem_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_pmem_qos_class = @@ -423,14 +433,20 @@ static struct attribute *cxl_memdev_pmem_attributes[] = { NULL, }; +static struct cxl_dpa_perf *to_ram_perf(struct cxl_dev_state *cxlds) +{ + if (cxlds->part[0].mode != CXL_PARTMODE_RAM) + return NULL; + return &cxlds->part[0].perf; +} + static ssize_t ram_qos_class_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_memdev *cxlmd = to_cxl_memdev(dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); - return sysfs_emit(buf, "%d\n", mds->ram_perf.qos_class); + return sysfs_emit(buf, "%d\n", to_ram_perf(cxlds)->qos_class); } static struct device_attribute dev_attr_ram_qos_class = @@ -466,11 +482,11 @@ static umode_t cxl_ram_visible(struct kobject *kobj, struct attribute *a, int n) { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_ram_perf(cxlmd->cxlds); - if (a == &dev_attr_ram_qos_class.attr) - if (mds->ram_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_ram_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } @@ -485,11 +501,11 @@ static umode_t cxl_pmem_visible(struct kobject *kobj, struct attribute *a, int n { struct device *dev = kobj_to_dev(kobj); struct cxl_memdev *cxlmd = to_cxl_memdev(dev); - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlmd->cxlds); + struct cxl_dpa_perf *perf = to_pmem_perf(cxlmd->cxlds); - if (a == &dev_attr_pmem_qos_class.attr) - if (mds->pmem_perf.qos_class == CXL_QOS_CLASS_INVALID) - return 0; + if (a == &dev_attr_pmem_qos_class.attr && + (!perf || perf->qos_class == CXL_QOS_CLASS_INVALID)) + return 0; return a->mode; } diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c index 7ed6e8f374af..194892bb1396 100644 --- a/drivers/cxl/core/port.c +++ b/drivers/cxl/core/port.c @@ -194,25 +194,35 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); + struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; + /* without @cxl_dpa_rwsem, make sure @part is not reloaded */ + int part = READ_ONCE(cxled->part); + const char *desc; + + if (part < 0) + desc = "none"; + else + desc = cxlds->part[part].res.name; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxled->mode)); + return sysfs_emit(buf, "%s\n", desc); } static ssize_t mode_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { struct cxl_endpoint_decoder *cxled = to_cxl_endpoint_decoder(dev); - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; ssize_t rc; if (sysfs_streq(buf, "pmem")) - mode = CXL_DECODER_PMEM; + mode = CXL_PARTMODE_PMEM; else if (sysfs_streq(buf, "ram")) - mode = CXL_DECODER_RAM; + mode = CXL_PARTMODE_RAM; else return -EINVAL; - rc = cxl_dpa_set_mode(cxled, mode); + rc = cxl_dpa_set_part(cxled, mode); if (rc) return rc; @@ -1899,6 +1909,7 @@ struct cxl_endpoint_decoder *cxl_endpoint_decoder_alloc(struct cxl_port *port) return ERR_PTR(-ENOMEM); cxled->pos = -1; + cxled->part = -1; cxld = &cxled->cxld; rc = cxl_decoder_init(port, cxld); if (rc) { diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c index e8d11a988fd9..84ce625b8591 100644 --- a/drivers/cxl/core/region.c +++ b/drivers/cxl/core/region.c @@ -144,7 +144,7 @@ static ssize_t uuid_show(struct device *dev, struct device_attribute *attr, rc = down_read_interruptible(&cxl_region_rwsem); if (rc) return rc; - if (cxlr->mode != CXL_DECODER_PMEM) + if (cxlr->mode != CXL_PARTMODE_PMEM) rc = sysfs_emit(buf, "\n"); else rc = sysfs_emit(buf, "%pUb\n", &p->uuid); @@ -441,7 +441,7 @@ static umode_t cxl_region_visible(struct kobject *kobj, struct attribute *a, * Support tooling that expects to find a 'uuid' attribute for all * regions regardless of mode. */ - if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_DECODER_PMEM) + if (a == &dev_attr_uuid.attr && cxlr->mode != CXL_PARTMODE_PMEM) return 0444; return a->mode; } @@ -603,8 +603,16 @@ static ssize_t mode_show(struct device *dev, struct device_attribute *attr, char *buf) { struct cxl_region *cxlr = to_cxl_region(dev); + const char *desc; - return sysfs_emit(buf, "%s\n", cxl_decoder_mode_name(cxlr->mode)); + if (cxlr->mode == CXL_PARTMODE_RAM) + desc = "ram"; + else if (cxlr->mode == CXL_PARTMODE_PMEM) + desc = "pmem"; + else + desc = ""; + + return sysfs_emit(buf, "%s\n", desc); } static DEVICE_ATTR_RO(mode); @@ -630,7 +638,7 @@ static int alloc_hpa(struct cxl_region *cxlr, resource_size_t size) /* ways, granularity and uuid (if PMEM) need to be set before HPA */ if (!p->interleave_ways || !p->interleave_granularity || - (cxlr->mode == CXL_DECODER_PMEM && uuid_is_null(&p->uuid))) + (cxlr->mode == CXL_PARTMODE_PMEM && uuid_is_null(&p->uuid))) return -ENXIO; div64_u64_rem(size, (u64)SZ_256M * p->interleave_ways, &remainder); @@ -1888,6 +1896,7 @@ static int cxl_region_attach(struct cxl_region *cxlr, { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(cxlr->dev.parent); struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct cxl_region_params *p = &cxlr->params; struct cxl_port *ep_port, *root_port; struct cxl_dport *dport; @@ -1902,17 +1911,17 @@ static int cxl_region_attach(struct cxl_region *cxlr, return rc; } - if (cxled->mode != cxlr->mode) { - dev_dbg(&cxlr->dev, "%s region mode: %d mismatch: %d\n", - dev_name(&cxled->cxld.dev), cxlr->mode, cxled->mode); - return -EINVAL; - } - - if (cxled->mode == CXL_DECODER_DEAD) { + if (cxled->part < 0) { dev_dbg(&cxlr->dev, "%s dead\n", dev_name(&cxled->cxld.dev)); return -ENODEV; } + if (cxlds->part[cxled->part].mode != cxlr->mode) { + dev_dbg(&cxlr->dev, "%s region mode: %d mismatch\n", + dev_name(&cxled->cxld.dev), cxlr->mode); + return -EINVAL; + } + /* all full of members, or interleave config not established? */ if (p->state > CXL_CONFIG_INTERLEAVE_ACTIVE) { dev_dbg(&cxlr->dev, "region already active\n"); @@ -2115,7 +2124,7 @@ out: void cxl_decoder_kill_region(struct cxl_endpoint_decoder *cxled) { down_write(&cxl_region_rwsem); - cxled->mode = CXL_DECODER_DEAD; + cxled->part = -1; cxl_region_detach(cxled); up_write(&cxl_region_rwsem); } @@ -2471,7 +2480,7 @@ static int cxl_region_calculate_adistance(struct notifier_block *nb, */ static struct cxl_region *devm_cxl_add_region(struct cxl_root_decoder *cxlrd, int id, - enum cxl_decoder_mode mode, + enum cxl_partition_mode mode, enum cxl_decoder_type type) { struct cxl_port *port = to_cxl_port(cxlrd->cxlsd.cxld.dev.parent); @@ -2525,13 +2534,13 @@ static ssize_t create_ram_region_show(struct device *dev, } static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, - enum cxl_decoder_mode mode, int id) + enum cxl_partition_mode mode, int id) { int rc; switch (mode) { - case CXL_DECODER_RAM: - case CXL_DECODER_PMEM: + case CXL_PARTMODE_RAM: + case CXL_PARTMODE_PMEM: break; default: dev_err(&cxlrd->cxlsd.cxld.dev, "unsupported mode %d\n", mode); @@ -2551,7 +2560,7 @@ static struct cxl_region *__create_region(struct cxl_root_decoder *cxlrd, } static ssize_t create_region_store(struct device *dev, const char *buf, - size_t len, enum cxl_decoder_mode mode) + size_t len, enum cxl_partition_mode mode) { struct cxl_root_decoder *cxlrd = to_cxl_root_decoder(dev); struct cxl_region *cxlr; @@ -2572,7 +2581,7 @@ static ssize_t create_pmem_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_PMEM); + return create_region_store(dev, buf, len, CXL_PARTMODE_PMEM); } DEVICE_ATTR_RW(create_pmem_region); @@ -2580,7 +2589,7 @@ static ssize_t create_ram_region_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t len) { - return create_region_store(dev, buf, len, CXL_DECODER_RAM); + return create_region_store(dev, buf, len, CXL_PARTMODE_RAM); } DEVICE_ATTR_RW(create_ram_region); @@ -2678,7 +2687,7 @@ EXPORT_SYMBOL_NS_GPL(to_cxl_pmem_region, "CXL"); struct cxl_poison_context { struct cxl_port *port; - enum cxl_decoder_mode mode; + int part; u64 offset; }; @@ -2686,47 +2695,45 @@ static int cxl_get_poison_unmapped(struct cxl_memdev *cxlmd, struct cxl_poison_context *ctx) { struct cxl_dev_state *cxlds = cxlmd->cxlds; + const struct resource *res; + struct resource *p, *last; u64 offset, length; int rc = 0; + if (ctx->part < 0) + return 0; + /* - * Collect poison for the remaining unmapped resources - * after poison is collected by committed endpoints. - * - * Knowing that PMEM must always follow RAM, get poison - * for unmapped resources based on the last decoder's mode: - * ram: scan remains of ram range, then any pmem range - * pmem: scan remains of pmem range + * Collect poison for the remaining unmapped resources after + * poison is collected by committed endpoints decoders. */ - - if (ctx->mode == CXL_DECODER_RAM) { - offset = ctx->offset; - length = resource_size(&cxlds->ram_res) - offset; + for (int i = ctx->part; i < cxlds->nr_partitions; i++) { + res = &cxlds->part[i].res; + for (p = res->child, last = NULL; p; p = p->sibling) + last = p; + if (last) + offset = last->end + 1; + else + offset = res->start; + length = res->end - offset + 1; + if (!length) + break; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT) - rc = 0; + if (rc == -EFAULT && cxlds->part[i].mode == CXL_PARTMODE_RAM) + continue; if (rc) - return rc; - } - if (ctx->mode == CXL_DECODER_PMEM) { - offset = ctx->offset; - length = resource_size(&cxlds->dpa_res) - offset; - if (!length) - return 0; - } else if (resource_size(&cxlds->pmem_res)) { - offset = cxlds->pmem_res.start; - length = resource_size(&cxlds->pmem_res); - } else { - return 0; + break; } - return cxl_mem_get_poison(cxlmd, offset, length, NULL); + return rc; } static int poison_by_decoder(struct device *dev, void *arg) { struct cxl_poison_context *ctx = arg; struct cxl_endpoint_decoder *cxled; + enum cxl_partition_mode mode; + struct cxl_dev_state *cxlds; struct cxl_memdev *cxlmd; u64 offset, length; int rc = 0; @@ -2735,27 +2742,18 @@ static int poison_by_decoder(struct device *dev, void *arg) return rc; cxled = to_cxl_endpoint_decoder(dev); - if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) - return rc; - - /* - * Regions are only created with single mode decoders: pmem or ram. - * Linux does not support mixed mode decoders. This means that - * reading poison per endpoint decoder adheres to the requirement - * that poison reads of pmem and ram must be separated. - * CXL 3.0 Spec 8.2.9.8.4.1 - */ - if (cxled->mode == CXL_DECODER_MIXED) { - dev_dbg(dev, "poison list read unsupported in mixed mode\n"); + if (!cxled->dpa_res) return rc; - } cxlmd = cxled_to_memdev(cxled); + cxlds = cxlmd->cxlds; + mode = cxlds->part[cxled->part].mode; + if (cxled->skip) { offset = cxled->dpa_res->start - cxled->skip; length = cxled->skip; rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2764,7 +2762,7 @@ static int poison_by_decoder(struct device *dev, void *arg) offset = cxled->dpa_res->start; length = cxled->dpa_res->end - offset + 1; rc = cxl_mem_get_poison(cxlmd, offset, length, cxled->cxld.region); - if (rc == -EFAULT && cxled->mode == CXL_DECODER_RAM) + if (rc == -EFAULT && mode == CXL_PARTMODE_RAM) rc = 0; if (rc) return rc; @@ -2772,7 +2770,7 @@ static int poison_by_decoder(struct device *dev, void *arg) /* Iterate until commit_end is reached */ if (cxled->cxld.id == ctx->port->commit_end) { ctx->offset = cxled->dpa_res->end + 1; - ctx->mode = cxled->mode; + ctx->part = cxled->part; return 1; } @@ -2785,7 +2783,8 @@ int cxl_get_poison_by_endpoint(struct cxl_port *port) int rc = 0; ctx = (struct cxl_poison_context) { - .port = port + .port = port, + .part = -1, }; rc = device_for_each_child(&port->dev, &ctx, poison_by_decoder); @@ -3230,14 +3229,18 @@ static struct cxl_region *construct_region(struct cxl_root_decoder *cxlrd, { struct cxl_memdev *cxlmd = cxled_to_memdev(cxled); struct cxl_port *port = cxlrd_to_port(cxlrd); + struct cxl_dev_state *cxlds = cxlmd->cxlds; struct range *hpa = &cxled->cxld.hpa_range; + int rc, part = READ_ONCE(cxled->part); struct cxl_region_params *p; struct cxl_region *cxlr; struct resource *res; - int rc; + + if (part < 0) + return ERR_PTR(-EBUSY); do { - cxlr = __create_region(cxlrd, cxled->mode, + cxlr = __create_region(cxlrd, cxlds->part[part].mode, atomic_read(&cxlrd->region_id)); } while (IS_ERR(cxlr) && PTR_ERR(cxlr) == -EBUSY); @@ -3440,9 +3443,9 @@ out: return rc; switch (cxlr->mode) { - case CXL_DECODER_PMEM: + case CXL_PARTMODE_PMEM: return devm_cxl_add_pmem_region(cxlr); - case CXL_DECODER_RAM: + case CXL_PARTMODE_RAM: /* * The region can not be manged by CXL if any portion of * it is already online as 'System RAM' diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h index bbbaa0d0a670..94a34833eedd 100644 --- a/drivers/cxl/cxl.h +++ b/drivers/cxl/cxl.h @@ -373,32 +373,6 @@ struct cxl_decoder { }; /* - * CXL_DECODER_DEAD prevents endpoints from being reattached to regions - * while cxld_unregister() is running - */ -enum cxl_decoder_mode { - CXL_DECODER_NONE, - CXL_DECODER_RAM, - CXL_DECODER_PMEM, - CXL_DECODER_MIXED, - CXL_DECODER_DEAD, -}; - -static inline const char *cxl_decoder_mode_name(enum cxl_decoder_mode mode) -{ - static const char * const names[] = { - [CXL_DECODER_NONE] = "none", - [CXL_DECODER_RAM] = "ram", - [CXL_DECODER_PMEM] = "pmem", - [CXL_DECODER_MIXED] = "mixed", - }; - - if (mode >= CXL_DECODER_NONE && mode <= CXL_DECODER_MIXED) - return names[mode]; - return "mixed"; -} - -/* * Track whether this decoder is reserved for region autodiscovery, or * free for userspace provisioning. */ @@ -412,16 +386,16 @@ enum cxl_decoder_state { * @cxld: base cxl_decoder_object * @dpa_res: actively claimed DPA span of this decoder * @skip: offset into @dpa_res where @cxld.hpa_range maps - * @mode: which memory type / access-mode-partition this decoder targets * @state: autodiscovery state + * @part: partition index this decoder maps * @pos: interleave position in @cxld.region */ struct cxl_endpoint_decoder { struct cxl_decoder cxld; struct resource *dpa_res; resource_size_t skip; - enum cxl_decoder_mode mode; enum cxl_decoder_state state; + int part; int pos; }; @@ -506,6 +480,11 @@ struct cxl_region_params { int nr_targets; }; +enum cxl_partition_mode { + CXL_PARTMODE_RAM, + CXL_PARTMODE_PMEM, +}; + /* * Indicate whether this region has been assembled by autodetection or * userspace assembly. Prevent endpoint decoders outside of automatic @@ -525,7 +504,7 @@ struct cxl_region_params { * struct cxl_region - CXL region * @dev: This region's device * @id: This region's id. Id is globally unique across all regions - * @mode: Endpoint decoder allocation / access mode + * @mode: Operational mode of the mapped capacity * @type: Endpoint decoder target type * @cxl_nvb: nvdimm bridge for coordinating @cxlr_pmem setup / shutdown * @cxlr_pmem: (for pmem regions) cached copy of the nvdimm bridge @@ -538,7 +517,7 @@ struct cxl_region_params { struct cxl_region { struct device dev; int id; - enum cxl_decoder_mode mode; + enum cxl_partition_mode mode; enum cxl_decoder_type type; struct cxl_nvdimm_bridge *cxl_nvb; struct cxl_pmem_region *cxlr_pmem; diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index 2a25d1957ddb..36a091597066 100644 --- a/drivers/cxl/cxlmem.h +++ b/drivers/cxl/cxlmem.h @@ -97,6 +97,19 @@ int devm_cxl_dpa_reserve(struct cxl_endpoint_decoder *cxled, resource_size_t base, resource_size_t len, resource_size_t skipped); +#define CXL_NR_PARTITIONS_MAX 2 + +struct cxl_dpa_info { + u64 size; + struct cxl_dpa_part_info { + struct range range; + enum cxl_partition_mode mode; + } part[CXL_NR_PARTITIONS_MAX]; + int nr_partitions; +}; + +int cxl_dpa_setup(struct cxl_dev_state *cxlds, const struct cxl_dpa_info *info); + static inline struct cxl_ep *cxl_ep_load(struct cxl_port *port, struct cxl_memdev *cxlmd) { @@ -409,6 +422,18 @@ struct cxl_dpa_perf { }; /** + * struct cxl_dpa_partition - DPA partition descriptor + * @res: shortcut to the partition in the DPA resource tree (cxlds->dpa_res) + * @perf: performance attributes of the partition from CDAT + * @mode: operation mode for the DPA capacity, e.g. ram, pmem, dynamic... + */ +struct cxl_dpa_partition { + struct resource res; + struct cxl_dpa_perf perf; + enum cxl_partition_mode mode; +}; + +/** * struct cxl_dev_state - The driver device state * * cxl_dev_state represents the CXL driver/device state. It provides an @@ -423,8 +448,8 @@ struct cxl_dpa_perf { * @rcd: operating in RCD mode (CXL 3.0 9.11.8 CXL Devices Attached to an RCH) * @media_ready: Indicate whether the device media is usable * @dpa_res: Overall DPA resource tree for the device - * @pmem_res: Active Persistent memory capacity configuration - * @ram_res: Active Volatile memory capacity configuration + * @part: DPA partition array + * @nr_partitions: Number of DPA partitions * @serial: PCIe Device Serial Number * @type: Generic Memory Class device or Vendor Specific Memory device * @cxl_mbox: CXL mailbox context @@ -438,13 +463,25 @@ struct cxl_dev_state { bool rcd; bool media_ready; struct resource dpa_res; - struct resource pmem_res; - struct resource ram_res; + struct cxl_dpa_partition part[CXL_NR_PARTITIONS_MAX]; + unsigned int nr_partitions; u64 serial; enum cxl_devtype type; struct cxl_mailbox cxl_mbox; }; +static inline resource_size_t cxl_pmem_size(struct cxl_dev_state *cxlds) +{ + /* + * Static PMEM may be at partition index 0 when there is no static RAM + * capacity. + */ + for (int i = 0; i < cxlds->nr_partitions; i++) + if (cxlds->part[i].mode == CXL_PARTMODE_PMEM) + return resource_size(&cxlds->part[i].res); + return 0; +} + static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) { return dev_get_drvdata(cxl_mbox->host); @@ -471,8 +508,6 @@ static inline struct cxl_dev_state *mbox_to_cxlds(struct cxl_mailbox *cxl_mbox) * @active_persistent_bytes: sum of hard + soft persistent * @next_volatile_bytes: volatile capacity change pending device reset * @next_persistent_bytes: persistent capacity change pending device reset - * @ram_perf: performance data entry matched to RAM partition - * @pmem_perf: performance data entry matched to PMEM partition * @event: event log driver state * @poison: poison driver state info * @security: security driver state info @@ -496,9 +531,6 @@ struct cxl_memdev_state { u64 next_volatile_bytes; u64 next_persistent_bytes; - struct cxl_dpa_perf ram_perf; - struct cxl_dpa_perf pmem_perf; - struct cxl_event_state event; struct cxl_poison_state poison; struct cxl_security_state security; @@ -818,7 +850,7 @@ int cxl_internal_send_cmd(struct cxl_mailbox *cxl_mbox, int cxl_dev_state_identify(struct cxl_memdev_state *mds); int cxl_await_media_ready(struct cxl_dev_state *cxlds); int cxl_enumerate_cmds(struct cxl_memdev_state *mds); -int cxl_mem_create_range_info(struct cxl_memdev_state *mds); +int cxl_mem_dpa_fetch(struct cxl_memdev_state *mds, struct cxl_dpa_info *info); struct cxl_memdev_state *cxl_memdev_state_create(struct device *dev); void set_exclusive_cxl_commands(struct cxl_memdev_state *mds, unsigned long *cmds); diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index 2f03a4d5606e..9675243bd05b 100644 --- a/drivers/cxl/mem.c +++ b/drivers/cxl/mem.c @@ -152,7 +152,7 @@ static int cxl_mem_probe(struct device *dev) return -ENXIO; } - if (resource_size(&cxlds->pmem_res) && IS_ENABLED(CONFIG_CXL_PMEM)) { + if (cxl_pmem_size(cxlds) && IS_ENABLED(CONFIG_CXL_PMEM)) { rc = devm_cxl_add_nvdimm(parent_port, cxlmd); if (rc) { if (rc == -ENODEV) diff --git a/drivers/cxl/pci.c b/drivers/cxl/pci.c index a96e54c6259e..b2c943a4de0a 100644 --- a/drivers/cxl/pci.c +++ b/drivers/cxl/pci.c @@ -903,6 +903,7 @@ __ATTRIBUTE_GROUPS(cxl_rcd); static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) { struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); + struct cxl_dpa_info range_info = { 0 }; struct cxl_memdev_state *mds; struct cxl_dev_state *cxlds; struct cxl_register_map map; @@ -993,7 +994,11 @@ static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); + if (rc) + return rc; + + rc = cxl_dpa_setup(cxlds, &range_info); if (rc) return rc; diff --git a/tools/testing/cxl/test/cxl.c b/tools/testing/cxl/test/cxl.c index cc8948f49117..083a66a52731 100644 --- a/tools/testing/cxl/test/cxl.c +++ b/tools/testing/cxl/test/cxl.c @@ -1000,25 +1000,21 @@ static void mock_cxl_endpoint_parse_cdat(struct cxl_port *port) find_cxl_root(port); struct cxl_memdev *cxlmd = to_cxl_memdev(port->uport_dev); struct cxl_dev_state *cxlds = cxlmd->cxlds; - struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); struct access_coordinate ep_c[ACCESS_COORDINATE_MAX]; - struct range pmem_range = { - .start = cxlds->pmem_res.start, - .end = cxlds->pmem_res.end, - }; - struct range ram_range = { - .start = cxlds->ram_res.start, - .end = cxlds->ram_res.end, - }; if (!cxl_root) return; - if (range_len(&ram_range)) - dpa_perf_setup(port, &ram_range, &mds->ram_perf); + for (int i = 0; i < cxlds->nr_partitions; i++) { + struct resource *res = &cxlds->part[i].res; + struct cxl_dpa_perf *perf = &cxlds->part[i].perf; + struct range range = { + .start = res->start, + .end = res->end, + }; - if (range_len(&pmem_range)) - dpa_perf_setup(port, &pmem_range, &mds->pmem_perf); + dpa_perf_setup(port, &range, perf); + } cxl_memdev_update_perf(cxlmd); diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c index 8d731bd63988..495199238335 100644 --- a/tools/testing/cxl/test/mem.c +++ b/tools/testing/cxl/test/mem.c @@ -1494,6 +1494,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) struct cxl_dev_state *cxlds; struct cxl_mockmem_data *mdata; struct cxl_mailbox *cxl_mbox; + struct cxl_dpa_info range_info = { 0 }; int rc; mdata = devm_kzalloc(dev, sizeof(*mdata), GFP_KERNEL); @@ -1554,7 +1555,11 @@ static int cxl_mock_mem_probe(struct platform_device *pdev) if (rc) return rc; - rc = cxl_mem_create_range_info(mds); + rc = cxl_mem_dpa_fetch(mds, &range_info); + if (rc) + return rc; + + rc = cxl_dpa_setup(cxlds, &range_info); if (rc) return rc; |