summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/testing/sysfs-edac-memory-repair57
-rwxr-xr-xdrivers/edac/mem_repair.c84
-rw-r--r--include/linux/edac.h28
3 files changed, 169 insertions, 0 deletions
diff --git a/Documentation/ABI/testing/sysfs-edac-memory-repair b/Documentation/ABI/testing/sysfs-edac-memory-repair
index c54f59e4497b..0434a3b23ff3 100644
--- a/Documentation/ABI/testing/sysfs-edac-memory-repair
+++ b/Documentation/ABI/testing/sysfs-edac-memory-repair
@@ -42,6 +42,14 @@ Description:
- ppr - Post package repair.
+ - cacheline-sparing
+
+ - row-sparing
+
+ - bank-sparing
+
+ - rank-sparing
+
- All other values are reserved.
What: /sys/bus/edac/devices/<dev-name>/mem_repairX/persist_mode
@@ -134,6 +142,55 @@ Description:
related error records and trace events, for eg. CXL DRAM
and CXL general media error records in CXL memory devices.
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank_group
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/bank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/rank
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/row
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/column
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/channel
+What: /sys/bus/edac/devices/<dev-name>/mem_repairX/sub_channel
+Date: March 2025
+KernelVersion: 6.15
+Contact: linux-edac@vger.kernel.org
+Description:
+ (RW) The control attributes for the memory to be repaired.
+ The specific value of attributes to use depends on the
+ portion of memory to repair and will be reported to the host
+ in related error records and be available to userspace
+ in trace events, such as CXL DRAM and CXL general media
+ error records of CXL memory devices.
+
+ When readng back these attributes, it returns the current
+ value of memory requested to be repaired.
+
+ bank_group - The bank group of the memory to repair.
+
+ bank - The bank number of the memory to repair.
+
+ rank - The rank of the memory to repair. Rank is defined as a
+ set of memory devices on a channel that together execute a
+ transaction.
+
+ row - The row number of the memory to repair.
+
+ column - The column number of the memory to repair.
+
+ channel - The channel of the memory to repair. Channel is
+ defined as an interface that can be independently accessed
+ for a transaction.
+
+ sub_channel - The subchannel of the memory to repair.
+
+ The requirement to set these attributes varies based on the
+ repair function. The attributes in sysfs are not present
+ unless required for a repair function.
+
+ For example, CXL spec ver 3.1, Section 8.2.9.7.1.2 Table 8-103
+ soft PPR and Section 8.2.9.7.1.3 Table 8-104 hard PPR operations,
+ these attributes are not required to set. CXL spec ver 3.1,
+ Section 8.2.9.7.1.4 Table 8-105 memory sparing, these attributes
+ are required to set based on memory sparing granularity.
+
What: /sys/bus/edac/devices/<dev-name>/mem_repairX/repair
Date: March 2025
KernelVersion: 6.15
diff --git a/drivers/edac/mem_repair.c b/drivers/edac/mem_repair.c
index 5c94ac1027db..3b1a845457b0 100755
--- a/drivers/edac/mem_repair.c
+++ b/drivers/edac/mem_repair.c
@@ -22,6 +22,13 @@ enum edac_mem_repair_attributes {
MR_MIN_DPA,
MR_MAX_DPA,
MR_NIBBLE_MASK,
+ MR_BANK_GROUP,
+ MR_BANK,
+ MR_RANK,
+ MR_ROW,
+ MR_COLUMN,
+ MR_CHANNEL,
+ MR_SUB_CHANNEL,
MEM_DO_REPAIR,
MR_MAX_ATTRS
};
@@ -70,6 +77,13 @@ MR_ATTR_SHOW(dpa, get_dpa, u64, "0x%llx\n")
MR_ATTR_SHOW(min_dpa, get_min_dpa, u64, "0x%llx\n")
MR_ATTR_SHOW(max_dpa, get_max_dpa, u64, "0x%llx\n")
MR_ATTR_SHOW(nibble_mask, get_nibble_mask, u32, "0x%x\n")
+MR_ATTR_SHOW(bank_group, get_bank_group, u32, "%u\n")
+MR_ATTR_SHOW(bank, get_bank, u32, "%u\n")
+MR_ATTR_SHOW(rank, get_rank, u32, "%u\n")
+MR_ATTR_SHOW(row, get_row, u32, "0x%x\n")
+MR_ATTR_SHOW(column, get_column, u32, "%u\n")
+MR_ATTR_SHOW(channel, get_channel, u32, "%u\n")
+MR_ATTR_SHOW(sub_channel, get_sub_channel, u32, "%u\n")
#define MR_ATTR_STORE(attrib, cb, type, conv_func) \
static ssize_t attrib##_store(struct device *ras_feat_dev, \
@@ -99,6 +113,13 @@ MR_ATTR_STORE(persist_mode, set_persist_mode, unsigned long, kstrtoul)
MR_ATTR_STORE(hpa, set_hpa, u64, kstrtou64)
MR_ATTR_STORE(dpa, set_dpa, u64, kstrtou64)
MR_ATTR_STORE(nibble_mask, set_nibble_mask, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank_group, set_bank_group, unsigned long, kstrtoul)
+MR_ATTR_STORE(bank, set_bank, unsigned long, kstrtoul)
+MR_ATTR_STORE(rank, set_rank, unsigned long, kstrtoul)
+MR_ATTR_STORE(row, set_row, unsigned long, kstrtoul)
+MR_ATTR_STORE(column, set_column, unsigned long, kstrtoul)
+MR_ATTR_STORE(channel, set_channel, unsigned long, kstrtoul)
+MR_ATTR_STORE(sub_channel, set_sub_channel, unsigned long, kstrtoul)
#define MR_DO_OP(attrib, cb) \
static ssize_t attrib##_store(struct device *ras_feat_dev, \
@@ -189,6 +210,62 @@ static umode_t mem_repair_attr_visible(struct kobject *kobj, struct attribute *a
return 0444;
}
break;
+ case MR_BANK_GROUP:
+ if (ops->get_bank_group) {
+ if (ops->set_bank_group)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_BANK:
+ if (ops->get_bank) {
+ if (ops->set_bank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_RANK:
+ if (ops->get_rank) {
+ if (ops->set_rank)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_ROW:
+ if (ops->get_row) {
+ if (ops->set_row)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_COLUMN:
+ if (ops->get_column) {
+ if (ops->set_column)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_CHANNEL:
+ if (ops->get_channel) {
+ if (ops->set_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
+ case MR_SUB_CHANNEL:
+ if (ops->get_sub_channel) {
+ if (ops->set_sub_channel)
+ return a->mode;
+ else
+ return 0444;
+ }
+ break;
case MEM_DO_REPAIR:
if (ops->do_repair)
return a->mode;
@@ -230,6 +307,13 @@ static int mem_repair_create_desc(struct device *dev,
[MR_MIN_DPA] = MR_ATTR_RO(min_dpa, instance),
[MR_MAX_DPA] = MR_ATTR_RO(max_dpa, instance),
[MR_NIBBLE_MASK] = MR_ATTR_RW(nibble_mask, instance),
+ [MR_BANK_GROUP] = MR_ATTR_RW(bank_group, instance),
+ [MR_BANK] = MR_ATTR_RW(bank, instance),
+ [MR_RANK] = MR_ATTR_RW(rank, instance),
+ [MR_ROW] = MR_ATTR_RW(row, instance),
+ [MR_COLUMN] = MR_ATTR_RW(column, instance),
+ [MR_CHANNEL] = MR_ATTR_RW(channel, instance),
+ [MR_SUB_CHANNEL] = MR_ATTR_RW(sub_channel, instance),
[MEM_DO_REPAIR] = MR_ATTR_WO(repair, instance)
};
diff --git a/include/linux/edac.h b/include/linux/edac.h
index cfb2ef41ab95..451f9c152c99 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -780,6 +780,20 @@ enum edac_mem_repair_cmd {
* @get_max_dpa: get the maximum supported device physical address (DPA).
* @get_nibble_mask: get current nibble mask of memory to repair.
* @set_nibble_mask: set nibble mask of memory to repair.
+ * @get_bank_group: get current bank group of memory to repair.
+ * @set_bank_group: set bank group of memory to repair.
+ * @get_bank: get current bank of memory to repair.
+ * @set_bank: set bank of memory to repair.
+ * @get_rank: get current rank of memory to repair.
+ * @set_rank: set rank of memory to repair.
+ * @get_row: get current row of memory to repair.
+ * @set_row: set row of memory to repair.
+ * @get_column: get current column of memory to repair.
+ * @set_column: set column of memory to repair.
+ * @get_channel: get current channel of memory to repair.
+ * @set_channel: set channel of memory to repair.
+ * @get_sub_channel: get current subchannel of memory to repair.
+ * @set_sub_channel: set subchannel of memory to repair.
* @do_repair: Issue memory repair operation for the HPA/DPA and
* other control attributes set for the memory to repair.
*
@@ -800,6 +814,20 @@ struct edac_mem_repair_ops {
int (*get_max_dpa)(struct device *dev, void *drv_data, u64 *dpa);
int (*get_nibble_mask)(struct device *dev, void *drv_data, u32 *val);
int (*set_nibble_mask)(struct device *dev, void *drv_data, u32 val);
+ int (*get_bank_group)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_bank_group)(struct device *dev, void *drv_data, u32 val);
+ int (*get_bank)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_bank)(struct device *dev, void *drv_data, u32 val);
+ int (*get_rank)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_rank)(struct device *dev, void *drv_data, u32 val);
+ int (*get_row)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_row)(struct device *dev, void *drv_data, u32 val);
+ int (*get_column)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_column)(struct device *dev, void *drv_data, u32 val);
+ int (*get_channel)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_channel)(struct device *dev, void *drv_data, u32 val);
+ int (*get_sub_channel)(struct device *dev, void *drv_data, u32 *val);
+ int (*set_sub_channel)(struct device *dev, void *drv_data, u32 val);
int (*do_repair)(struct device *dev, void *drv_data, u32 val);
};