summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorStephen Rothwell <sfr@canb.auug.org.au>2010-12-10 12:16:56 +1100
committerStephen Rothwell <sfr@canb.auug.org.au>2010-12-10 12:16:56 +1100
commit005d26868e9a839db816693432716ee5389c6f90 (patch)
treeb3f0c6b4fdde66888d5bace3ac6cb006c8813a0e
parentbead7f175bb1647caa93ed46be229bc6569cfb91 (diff)
parent295839e770f95a272d25dc707183cb5f9ba14c1c (diff)
Merge remote branch 'edac-amd/for-next'
-rw-r--r--drivers/edac/Kconfig8
-rw-r--r--drivers/edac/amd64_edac.c829
-rw-r--r--drivers/edac/amd64_edac.h86
-rw-r--r--drivers/edac/amd64_edac_inj.c25
-rw-r--r--drivers/edac/cpc925_edac.c9
-rw-r--r--drivers/edac/e752x_edac.c8
-rw-r--r--drivers/edac/edac_core.h11
-rw-r--r--drivers/edac/edac_mc.c14
-rw-r--r--drivers/edac/edac_mc_sysfs.c57
-rw-r--r--drivers/edac/i5100_edac.c9
-rw-r--r--drivers/edac/mce_amd.c450
-rw-r--r--drivers/edac/mce_amd.h14
-rw-r--r--drivers/edac/mce_amd_inj.c9
13 files changed, 805 insertions, 724 deletions
diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig
index f436a2fa9f38..fe70a341bd8b 100644
--- a/drivers/edac/Kconfig
+++ b/drivers/edac/Kconfig
@@ -75,11 +75,11 @@ config EDAC_MCE
bool
config EDAC_AMD64
- tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
- depends on EDAC_MM_EDAC && AMD_NB && X86_64 && PCI && EDAC_DECODE_MCE
+ tristate "AMD64 (Opteron, Athlon64) K8, F10h"
+ depends on EDAC_MM_EDAC && AMD_NB && X86_64 && EDAC_DECODE_MCE
help
- Support for error detection and correction on the AMD 64
- Families of Memory Controllers (K8, F10h and F11h)
+ Support for error detection and correction of DRAM ECC errors on
+ the AMD64 families of memory controllers (K8 and F10h)
config EDAC_AMD64_ERROR_INJECTION
bool "Sysfs HW Error injection facilities"
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c
index 774f950b08ab..4a5ecc58025d 100644
--- a/drivers/edac/amd64_edac.c
+++ b/drivers/edac/amd64_edac.c
@@ -15,10 +15,14 @@ module_param(ecc_enable_override, int, 0644);
static struct msr __percpu *msrs;
-/* Lookup table for all possible MC control instances */
-struct amd64_pvt;
-static struct mem_ctl_info *mci_lookup[EDAC_MAX_NUMNODES];
-static struct amd64_pvt *pvt_lookup[EDAC_MAX_NUMNODES];
+/*
+ * count successfully initialized driver instances for setup_pci_device()
+ */
+static atomic_t drv_instances = ATOMIC_INIT(0);
+
+/* Per-node driver instances */
+static struct mem_ctl_info **mcis;
+static struct ecc_settings **ecc_stngs;
/*
* Address to DRAM bank mapping: see F2x80 for K8 and F2x[1,0]80 for Fam10 and
@@ -62,7 +66,7 @@ static int ddr3_dbam[] = { [0] = -1,
[5 ... 6] = 1024,
[7 ... 8] = 2048,
[9 ... 10] = 4096,
- [11] = 8192,
+ [11] = 8192,
};
/*
@@ -73,7 +77,11 @@ static int ddr3_dbam[] = { [0] = -1,
*FIXME: Produce a better mapping/linearisation.
*/
-struct scrubrate scrubrates[] = {
+
+struct scrubrate {
+ u32 scrubval; /* bit pattern for scrub rate */
+ u32 bandwidth; /* bandwidth consumed (bytes/sec) */
+} scrubrates[] = {
{ 0x01, 1600000000UL},
{ 0x02, 800000000UL},
{ 0x03, 400000000UL},
@@ -117,8 +125,7 @@ struct scrubrate scrubrates[] = {
* scan the scrub rate mapping table for a close or matching bandwidth value to
* issue. If requested is too big, then use last maximum value found.
*/
-static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
- u32 min_scrubrate)
+static int __amd64_set_scrub_rate(struct pci_dev *ctl, u32 new_bw, u32 min_rate)
{
u32 scrubval;
int i;
@@ -134,7 +141,7 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
* skip scrub rates which aren't recommended
* (see F10 BKDG, F3x58)
*/
- if (scrubrates[i].scrubval < min_scrubrate)
+ if (scrubrates[i].scrubval < min_rate)
continue;
if (scrubrates[i].bandwidth <= new_bw)
@@ -148,64 +155,41 @@ static int amd64_search_set_scrub_rate(struct pci_dev *ctl, u32 new_bw,
}
scrubval = scrubrates[i].scrubval;
- if (scrubval)
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Setting scrub rate bandwidth: %u\n",
- scrubrates[i].bandwidth);
- else
- edac_printk(KERN_DEBUG, EDAC_MC, "Turning scrubbing off.\n");
pci_write_bits32(ctl, K8_SCRCTRL, scrubval, 0x001F);
+ if (scrubval)
+ return scrubrates[i].bandwidth;
+
return 0;
}
-static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth)
+static int amd64_set_scrub_rate(struct mem_ctl_info *mci, u32 bw)
{
struct amd64_pvt *pvt = mci->pvt_info;
- u32 min_scrubrate = 0x0;
-
- switch (boot_cpu_data.x86) {
- case 0xf:
- min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
- break;
- case 0x10:
- min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
- break;
- case 0x11:
- min_scrubrate = F11_MIN_SCRUB_RATE_BITS;
- break;
- default:
- amd64_printk(KERN_ERR, "Unsupported family!\n");
- return -EINVAL;
- }
- return amd64_search_set_scrub_rate(pvt->misc_f3_ctl, bandwidth,
- min_scrubrate);
+ return __amd64_set_scrub_rate(pvt->F3, bw, pvt->min_scrubrate);
}
-static int amd64_get_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+static int amd64_get_scrub_rate(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
u32 scrubval = 0;
- int status = -1, i;
+ int i, retval = -EINVAL;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_SCRCTRL, &scrubval);
+ amd64_read_pci_cfg(pvt->F3, K8_SCRCTRL, &scrubval);
scrubval = scrubval & 0x001F;
- edac_printk(KERN_DEBUG, EDAC_MC,
- "pci-read, sdram scrub control value: %d \n", scrubval);
+ amd64_debug("pci-read, sdram scrub control value: %d\n", scrubval);
for (i = 0; i < ARRAY_SIZE(scrubrates); i++) {
if (scrubrates[i].scrubval == scrubval) {
- *bw = scrubrates[i].bandwidth;
- status = 0;
+ retval = scrubrates[i].bandwidth;
break;
}
}
-
- return status;
+ return retval;
}
/* Map from a CSROW entry to the mask entry that operates on it */
@@ -314,9 +298,7 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
if (unlikely((intlv_en != 0x01) &&
(intlv_en != 0x03) &&
(intlv_en != 0x07))) {
- amd64_printk(KERN_WARNING, "junk value of 0x%x extracted from "
- "IntlvEn field of DRAM Base Register for node 0: "
- "this probably indicates a BIOS bug.\n", intlv_en);
+ amd64_warn("DRAM Base[IntlvEn] junk value: 0x%x, BIOS bug?\n", intlv_en);
return NULL;
}
@@ -332,11 +314,9 @@ static struct mem_ctl_info *find_mc_by_sys_addr(struct mem_ctl_info *mci,
/* sanity test for sys_addr */
if (unlikely(!amd64_base_limit_match(pvt, sys_addr, node_id))) {
- amd64_printk(KERN_WARNING,
- "%s(): sys_addr 0x%llx falls outside base/limit "
- "address range for node %d with node interleaving "
- "enabled.\n",
- __func__, sys_addr, node_id);
+ amd64_warn("%s: sys_addr 0x%llx falls outside base/limit address"
+ "range for node %d with node interleaving enabled.\n",
+ __func__, sys_addr, node_id);
return NULL;
}
@@ -788,9 +768,8 @@ static int sys_addr_to_csrow(struct mem_ctl_info *mci, u64 sys_addr)
csrow = input_addr_to_csrow(mci, sys_addr_to_input_addr(mci, sys_addr));
if (csrow == -1)
- amd64_mc_printk(mci, KERN_ERR,
- "Failed to translate InputAddr to csrow for "
- "address 0x%lx\n", (unsigned long)sys_addr);
+ amd64_mc_err(mci, "Failed to translate InputAddr to csrow for "
+ "address 0x%lx\n", (unsigned long)sys_addr);
return csrow;
}
@@ -801,21 +780,6 @@ static u16 extract_syndrome(struct err_regs *err)
return ((err->nbsh >> 15) & 0xff) | ((err->nbsl >> 16) & 0xff00);
}
-static void amd64_cpu_display_info(struct amd64_pvt *pvt)
-{
- if (boot_cpu_data.x86 == 0x11)
- edac_printk(KERN_DEBUG, EDAC_MC, "F11h CPU detected\n");
- else if (boot_cpu_data.x86 == 0x10)
- edac_printk(KERN_DEBUG, EDAC_MC, "F10h CPU detected\n");
- else if (boot_cpu_data.x86 == 0xf)
- edac_printk(KERN_DEBUG, EDAC_MC, "%s detected\n",
- (pvt->ext_model >= K8_REV_F) ?
- "Rev F or later" : "Rev E or earlier");
- else
- /* we'll hardly ever ever get here */
- edac_printk(KERN_ERR, EDAC_MC, "Unknown cpu!\n");
-}
-
/*
* Determine if the DIMMs have ECC enabled. ECC is enabled ONLY if all the DIMMs
* are ECC capable.
@@ -893,8 +857,7 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
return;
}
- amd64_printk(KERN_INFO, "using %s syndromes.\n",
- ((pvt->syn_type == 8) ? "x8" : "x4"));
+ amd64_info("using %s syndromes.\n", ((pvt->syn_type == 8) ? "x8" : "x4"));
/* Only if NOT ganged does dclr1 have valid info */
if (!dct_ganging_enabled(pvt))
@@ -915,10 +878,10 @@ static void amd64_dump_misc_regs(struct amd64_pvt *pvt)
/* Read in both of DBAM registers */
static void amd64_read_dbam_reg(struct amd64_pvt *pvt)
{
- amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM0, &pvt->dbam0);
+ amd64_read_pci_cfg(pvt->F2, DBAM0, &pvt->dbam0);
if (boot_cpu_data.x86 >= 0x10)
- amd64_read_pci_cfg(pvt->dram_f2_ctl, DBAM1, &pvt->dbam1);
+ amd64_read_pci_cfg(pvt->F2, DBAM1, &pvt->dbam1);
}
/*
@@ -965,14 +928,8 @@ static void amd64_set_dct_base_and_mask(struct amd64_pvt *pvt)
pvt->dcsm_mask = REV_F_F1Xh_DCSM_MASK_BITS;
pvt->dcs_mask_notused = REV_F_F1Xh_DCS_NOTUSED_BITS;
pvt->dcs_shift = REV_F_F1Xh_DCS_SHIFT;
-
- if (boot_cpu_data.x86 == 0x11) {
- pvt->cs_count = 4;
- pvt->num_dcsm = 2;
- } else {
- pvt->cs_count = 8;
- pvt->num_dcsm = 4;
- }
+ pvt->cs_count = 8;
+ pvt->num_dcsm = 4;
}
}
@@ -987,14 +944,14 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
for (cs = 0; cs < pvt->cs_count; cs++) {
reg = K8_DCSB0 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsb0[cs]))
+ if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsb0[cs]))
debugf0(" DCSB0[%d]=0x%08x reg: F2x%x\n",
cs, pvt->dcsb0[cs], reg);
/* If DCT are NOT ganged, then read in DCT1's base */
if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
reg = F10_DCSB1 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg,
+ if (!amd64_read_pci_cfg(pvt->F2, reg,
&pvt->dcsb1[cs]))
debugf0(" DCSB1[%d]=0x%08x reg: F2x%x\n",
cs, pvt->dcsb1[cs], reg);
@@ -1005,14 +962,14 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
for (cs = 0; cs < pvt->num_dcsm; cs++) {
reg = K8_DCSM0 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg, &pvt->dcsm0[cs]))
+ if (!amd64_read_pci_cfg(pvt->F2, reg, &pvt->dcsm0[cs]))
debugf0(" DCSM0[%d]=0x%08x reg: F2x%x\n",
cs, pvt->dcsm0[cs], reg);
/* If DCT are NOT ganged, then read in DCT1's mask */
if (boot_cpu_data.x86 >= 0x10 && !dct_ganging_enabled(pvt)) {
reg = F10_DCSM1 + (cs * 4);
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, reg,
+ if (!amd64_read_pci_cfg(pvt->F2, reg,
&pvt->dcsm1[cs]))
debugf0(" DCSM1[%d]=0x%08x reg: F2x%x\n",
cs, pvt->dcsm1[cs], reg);
@@ -1022,7 +979,7 @@ static void amd64_read_dct_base_mask(struct amd64_pvt *pvt)
}
}
-static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
+static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt, int cs)
{
enum mem_type type;
@@ -1035,7 +992,7 @@ static enum mem_type amd64_determine_memory_type(struct amd64_pvt *pvt)
type = (pvt->dclr0 & BIT(18)) ? MEM_DDR : MEM_RDDR;
}
- debugf1(" Memory type is: %s\n", edac_mem_types[type]);
+ amd64_info("CS%d: %s\n", cs, edac_mem_types[type]);
return type;
}
@@ -1053,17 +1010,16 @@ static int k8_early_channel_count(struct amd64_pvt *pvt)
{
int flag, err = 0;
- err = amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
+ err = amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0);
if (err)
return err;
- if ((boot_cpu_data.x86_model >> 4) >= K8_REV_F) {
+ if (pvt->ext_model >= K8_REV_F)
/* RevF (NPT) and later */
flag = pvt->dclr0 & F10_WIDTH_128;
- } else {
+ else
/* RevE and earlier */
flag = pvt->dclr0 & REVE_WIDTH_128;
- }
/* not used */
pvt->dclr1 = 0;
@@ -1090,14 +1046,14 @@ static void k8_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
u32 low;
u32 off = dram << 3; /* 8 bytes between DRAM entries */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_BASE_LOW + off, &low);
+ amd64_read_pci_cfg(pvt->F1, K8_DRAM_BASE_LOW + off, &low);
/* Extract parts into separate data entries */
pvt->dram_base[dram] = ((u64) low & 0xFFFF0000) << 8;
pvt->dram_IntlvEn[dram] = (low >> 8) & 0x7;
pvt->dram_rw_en[dram] = (low & 0x3);
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DRAM_LIMIT_LOW + off, &low);
+ amd64_read_pci_cfg(pvt->F1, K8_DRAM_LIMIT_LOW + off, &low);
/*
* Extract parts into separate data entries. Limit is the HIGHEST memory
@@ -1127,9 +1083,8 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
* 2 DIMMs is in error. So we need to ID 'both' of them
* as suspect.
*/
- amd64_mc_printk(mci, KERN_WARNING,
- "unknown syndrome 0x%04x - possible "
- "error reporting race\n", syndrome);
+ amd64_mc_warn(mci, "unknown syndrome 0x%04x - possible "
+ "error reporting race\n", syndrome);
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
return;
}
@@ -1151,8 +1106,7 @@ static void k8_map_sysaddr_to_csrow(struct mem_ctl_info *mci,
*/
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
- amd64_mc_printk(mci, KERN_ERR,
- "failed to map error address 0x%lx to a node\n",
+ amd64_mc_err(mci, "failed to map error addr 0x%lx to a node\n",
(unsigned long)sys_addr);
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
return;
@@ -1220,7 +1174,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
* both controllers since DIMMs can be placed in either one.
*/
for (i = 0; i < ARRAY_SIZE(dbams); i++) {
- if (amd64_read_pci_cfg(pvt->dram_f2_ctl, dbams[i], &dbam))
+ if (amd64_read_pci_cfg(pvt->F2, dbams[i], &dbam))
goto err_reg;
for (j = 0; j < 4; j++) {
@@ -1234,7 +1188,7 @@ static int f10_early_channel_count(struct amd64_pvt *pvt)
if (channels > 2)
channels = 2;
- debugf0("MCT channel count: %d\n", channels);
+ amd64_info("MCT channel count: %d\n", channels);
return channels;
@@ -1255,31 +1209,6 @@ static int f10_dbam_to_chip_select(struct amd64_pvt *pvt, int cs_mode)
return dbam_map[cs_mode];
}
-/* Enable extended configuration access via 0xCF8 feature */
-static void amd64_setup(struct amd64_pvt *pvt)
-{
- u32 reg;
-
- amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
-
- pvt->flags.cf8_extcfg = !!(reg & F10_NB_CFG_LOW_ENABLE_EXT_CFG);
- reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
-}
-
-/* Restore the extended configuration access via 0xCF8 feature */
-static void amd64_teardown(struct amd64_pvt *pvt)
-{
- u32 reg;
-
- amd64_read_pci_cfg(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, &reg);
-
- reg &= ~F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- if (pvt->flags.cf8_extcfg)
- reg |= F10_NB_CFG_LOW_ENABLE_EXT_CFG;
- pci_write_config_dword(pvt->misc_f3_ctl, F10_NB_CFG_HIGH, reg);
-}
-
static u64 f10_get_error_address(struct mem_ctl_info *mci,
struct err_regs *info)
{
@@ -1301,10 +1230,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
high_offset = F10_DRAM_BASE_HIGH + (dram << 3);
/* read the 'raw' DRAM BASE Address register */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_base);
-
- /* Read from the ECS data register */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_base);
+ amd64_read_pci_cfg(pvt->F1, low_offset, &low_base);
+ amd64_read_pci_cfg(pvt->F1, high_offset, &high_base);
/* Extract parts into separate data entries */
pvt->dram_rw_en[dram] = (low_base & 0x3);
@@ -1321,10 +1248,8 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
high_offset = F10_DRAM_LIMIT_HIGH + (dram << 3);
/* read the 'raw' LIMIT registers */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, low_offset, &low_limit);
-
- /* Read from the ECS data register for the HIGH portion */
- amd64_read_pci_cfg(pvt->addr_f1_ctl, high_offset, &high_limit);
+ amd64_read_pci_cfg(pvt->F1, low_offset, &low_limit);
+ amd64_read_pci_cfg(pvt->F1, high_offset, &high_limit);
pvt->dram_DstNode[dram] = (low_limit & 0x7);
pvt->dram_IntlvSel[dram] = (low_limit >> 8) & 0x7;
@@ -1341,7 +1266,7 @@ static void f10_read_dram_base_limit(struct amd64_pvt *pvt, int dram)
static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
{
- if (!amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_LOW,
+ if (!amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_LOW,
&pvt->dram_ctl_select_low)) {
debugf0("F2x110 (DCTL Sel. Low): 0x%08x, "
"High range addresses at: 0x%x\n",
@@ -1367,7 +1292,7 @@ static void f10_read_dram_ctl_register(struct amd64_pvt *pvt)
dct_sel_interleave_addr(pvt));
}
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCTL_SEL_HIGH,
+ amd64_read_pci_cfg(pvt->F2, F10_DCTL_SEL_HIGH,
&pvt->dram_ctl_select_high);
}
@@ -1496,7 +1421,7 @@ static int f10_lookup_addr_in_dct(u32 in_addr, u32 nid, u32 cs)
int cs_found = -EINVAL;
int csrow;
- mci = mci_lookup[nid];
+ mci = mcis[nid];
if (!mci)
return cs_found;
@@ -1572,7 +1497,7 @@ static int f10_match_to_this_node(struct amd64_pvt *pvt, int dram_range,
debugf1(" HoleOffset=0x%x HoleValid=0x%x IntlvSel=0x%x\n",
hole_off, hole_valid, intlv_sel);
- if (intlv_en ||
+ if (intlv_en &&
(intlv_sel != ((sys_addr >> 12) & intlv_en)))
return -EINVAL;
@@ -1738,28 +1663,17 @@ static void amd64_debug_display_dimm_sizes(int ctrl, struct amd64_pvt *pvt)
if (dcsb[dimm*2 + 1] & K8_DCSB_CS_ENABLE)
size1 = pvt->ops->dbam_to_cs(pvt, DBAM_DIMM(dimm, dbam));
- edac_printk(KERN_DEBUG, EDAC_MC, " %d: %5dMB %d: %5dMB\n",
- dimm * 2, size0 << factor,
- dimm * 2 + 1, size1 << factor);
+ amd64_info(EDAC_MC ": %d: %5dMB %d: %5dMB\n",
+ dimm * 2, size0 << factor,
+ dimm * 2 + 1, size1 << factor);
}
}
-/*
- * There currently are 3 types type of MC devices for AMD Athlon/Opterons
- * (as per PCI DEVICE_IDs):
- *
- * Family K8: That is the Athlon64 and Opteron CPUs. They all have the same PCI
- * DEVICE ID, even though there is differences between the different Revisions
- * (CG,D,E,F).
- *
- * Family F10h and F11h.
- *
- */
static struct amd64_family_type amd64_family_types[] = {
[K8_CPUS] = {
- .ctl_name = "RevF",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_K8_NB_MISC,
+ .ctl_name = "K8",
+ .f1_id = PCI_DEVICE_ID_AMD_K8_NB_ADDRMAP,
+ .f3_id = PCI_DEVICE_ID_AMD_K8_NB_MISC,
.ops = {
.early_channel_count = k8_early_channel_count,
.get_error_address = k8_get_error_address,
@@ -1769,22 +1683,9 @@ static struct amd64_family_type amd64_family_types[] = {
}
},
[F10_CPUS] = {
- .ctl_name = "Family 10h",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_10H_NB_MAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_10H_NB_MISC,
- .ops = {
- .early_channel_count = f10_early_channel_count,
- .get_error_address = f10_get_error_address,
- .read_dram_base_limit = f10_read_dram_base_limit,
- .read_dram_ctl_register = f10_read_dram_ctl_register,
- .map_sysaddr_to_csrow = f10_map_sysaddr_to_csrow,
- .dbam_to_cs = f10_dbam_to_chip_select,
- }
- },
- [F11_CPUS] = {
- .ctl_name = "Family 11h",
- .addr_f1_ctl = PCI_DEVICE_ID_AMD_11H_NB_MAP,
- .misc_f3_ctl = PCI_DEVICE_ID_AMD_11H_NB_MISC,
+ .ctl_name = "F10h",
+ .f1_id = PCI_DEVICE_ID_AMD_10H_NB_MAP,
+ .f3_id = PCI_DEVICE_ID_AMD_10H_NB_MISC,
.ops = {
.early_channel_count = f10_early_channel_count,
.get_error_address = f10_get_error_address,
@@ -1970,8 +1871,7 @@ static int get_channel_from_ecc_syndrome(struct mem_ctl_info *mci, u16 syndrome)
ARRAY_SIZE(x4_vectors),
pvt->syn_type);
else {
- amd64_printk(KERN_WARNING, "%s: Illegal syndrome type: %u\n",
- __func__, pvt->syn_type);
+ amd64_warn("Illegal syndrome type: %u\n", pvt->syn_type);
return err_sym;
}
@@ -1989,17 +1889,15 @@ static void amd64_handle_ce(struct mem_ctl_info *mci,
u64 sys_addr;
/* Ensure that the Error Address is VALID */
- if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
- amd64_mc_printk(mci, KERN_ERR,
- "HW has no ERROR_ADDRESS available\n");
+ if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) {
+ amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ce_no_info(mci, EDAC_MOD_STR);
return;
}
sys_addr = pvt->ops->get_error_address(mci, info);
- amd64_mc_printk(mci, KERN_ERR,
- "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
+ amd64_mc_err(mci, "CE ERROR_ADDRESS= 0x%llx\n", sys_addr);
pvt->ops->map_sysaddr_to_csrow(mci, info, sys_addr);
}
@@ -2016,9 +1914,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
log_mci = mci;
- if ((info->nbsh & K8_NBSH_VALID_ERROR_ADDR) == 0) {
- amd64_mc_printk(mci, KERN_CRIT,
- "HW has no ERROR_ADDRESS available\n");
+ if (!(info->nbsh & K8_NBSH_VALID_ERROR_ADDR)) {
+ amd64_mc_err(mci, "HW has no ERROR_ADDRESS available\n");
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
return;
}
@@ -2031,9 +1928,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
*/
src_mci = find_mc_by_sys_addr(mci, sys_addr);
if (!src_mci) {
- amd64_mc_printk(mci, KERN_CRIT,
- "ERROR ADDRESS (0x%lx) value NOT mapped to a MC\n",
- (unsigned long)sys_addr);
+ amd64_mc_err(mci, "ERROR ADDRESS (0x%lx) NOT mapped to a MC\n",
+ (unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
return;
}
@@ -2042,9 +1938,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
csrow = sys_addr_to_csrow(log_mci, sys_addr);
if (csrow < 0) {
- amd64_mc_printk(mci, KERN_CRIT,
- "ERROR_ADDRESS (0x%lx) value NOT mapped to 'csrow'\n",
- (unsigned long)sys_addr);
+ amd64_mc_err(mci, "ERROR_ADDRESS (0x%lx) NOT mapped to CS\n",
+ (unsigned long)sys_addr);
edac_mc_handle_ue_no_info(log_mci, EDAC_MOD_STR);
} else {
error_address_to_page_and_offset(sys_addr, &page, &offset);
@@ -2055,8 +1950,8 @@ static void amd64_handle_ue(struct mem_ctl_info *mci,
static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
struct err_regs *info)
{
- u32 ec = ERROR_CODE(info->nbsl);
- u32 xec = EXT_ERROR_CODE(info->nbsl);
+ u16 ec = EC(info->nbsl);
+ u8 xec = XEC(info->nbsl, 0x1f);
int ecc_type = (info->nbsh >> 13) & 0x3;
/* Bail early out if this was an 'observed' error */
@@ -2075,7 +1970,7 @@ static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci,
void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
{
- struct mem_ctl_info *mci = mci_lookup[node_id];
+ struct mem_ctl_info *mci = mcis[node_id];
struct err_regs regs;
regs.nbsl = (u32) m->status;
@@ -2099,75 +1994,50 @@ void amd64_decode_bus_error(int node_id, struct mce *m, u32 nbcfg)
}
/*
- * Input:
- * 1) struct amd64_pvt which contains pvt->dram_f2_ctl pointer
- * 2) AMD Family index value
- *
- * Ouput:
- * Upon return of 0, the following filled in:
- *
- * struct pvt->addr_f1_ctl
- * struct pvt->misc_f3_ctl
- *
- * Filled in with related device funcitions of 'dram_f2_ctl'
- * These devices are "reserved" via the pci_get_device()
- *
- * Upon return of 1 (error status):
- *
- * Nothing reserved
+ * Use pvt->F2 which contains the F2 CPU PCI device to get the related
+ * F1 (AddrMap) and F3 (Misc) devices. Return negative value on error.
*/
-static int amd64_reserve_mc_sibling_devices(struct amd64_pvt *pvt, int mc_idx)
+static int reserve_mc_sibling_devs(struct amd64_pvt *pvt, u16 f1_id, u16 f3_id)
{
- const struct amd64_family_type *amd64_dev = &amd64_family_types[mc_idx];
-
/* Reserve the ADDRESS MAP Device */
- pvt->addr_f1_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
- amd64_dev->addr_f1_ctl,
- pvt->dram_f2_ctl);
-
- if (!pvt->addr_f1_ctl) {
- amd64_printk(KERN_ERR, "error address map device not found: "
- "vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, amd64_dev->addr_f1_ctl);
- return 1;
+ pvt->F1 = pci_get_related_function(pvt->F2->vendor, f1_id, pvt->F2);
+ if (!pvt->F1) {
+ amd64_err("error address map device not found: "
+ "vendor %x device 0x%x (broken BIOS?)\n",
+ PCI_VENDOR_ID_AMD, f1_id);
+ return -ENODEV;
}
/* Reserve the MISC Device */
- pvt->misc_f3_ctl = pci_get_related_function(pvt->dram_f2_ctl->vendor,
- amd64_dev->misc_f3_ctl,
- pvt->dram_f2_ctl);
+ pvt->F3 = pci_get_related_function(pvt->F2->vendor, f3_id, pvt->F2);
+ if (!pvt->F3) {
+ pci_dev_put(pvt->F1);
+ pvt->F1 = NULL;
- if (!pvt->misc_f3_ctl) {
- pci_dev_put(pvt->addr_f1_ctl);
- pvt->addr_f1_ctl = NULL;
+ amd64_err("error F3 device not found: "
+ "vendor %x device 0x%x (broken BIOS?)\n",
+ PCI_VENDOR_ID_AMD, f3_id);
- amd64_printk(KERN_ERR, "error miscellaneous device not found: "
- "vendor %x device 0x%x (broken BIOS?)\n",
- PCI_VENDOR_ID_AMD, amd64_dev->misc_f3_ctl);
- return 1;
+ return -ENODEV;
}
-
- debugf1(" Addr Map device PCI Bus ID:\t%s\n",
- pci_name(pvt->addr_f1_ctl));
- debugf1(" DRAM MEM-CTL PCI Bus ID:\t%s\n",
- pci_name(pvt->dram_f2_ctl));
- debugf1(" Misc device PCI Bus ID:\t%s\n",
- pci_name(pvt->misc_f3_ctl));
+ debugf1("F1: %s\n", pci_name(pvt->F1));
+ debugf1("F2: %s\n", pci_name(pvt->F2));
+ debugf1("F3: %s\n", pci_name(pvt->F3));
return 0;
}
-static void amd64_free_mc_sibling_devices(struct amd64_pvt *pvt)
+static void free_mc_sibling_devs(struct amd64_pvt *pvt)
{
- pci_dev_put(pvt->addr_f1_ctl);
- pci_dev_put(pvt->misc_f3_ctl);
+ pci_dev_put(pvt->F1);
+ pci_dev_put(pvt->F3);
}
/*
* Retrieve the hardware registers of the memory controller (this includes the
* 'Address Map' and 'Misc' device regs)
*/
-static void amd64_read_mc_registers(struct amd64_pvt *pvt)
+static void read_mc_regs(struct amd64_pvt *pvt)
{
u64 msr_val;
u32 tmp;
@@ -2188,9 +2058,7 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt)
} else
debugf0(" TOP_MEM2 disabled.\n");
- amd64_cpu_display_info(pvt);
-
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCAP, &pvt->nbcap);
+ amd64_read_pci_cfg(pvt->F3, K8_NBCAP, &pvt->nbcap);
if (pvt->ops->read_dram_ctl_register)
pvt->ops->read_dram_ctl_register(pvt);
@@ -2227,21 +2095,20 @@ static void amd64_read_mc_registers(struct amd64_pvt *pvt)
amd64_read_dct_base_mask(pvt);
- amd64_read_pci_cfg(pvt->addr_f1_ctl, K8_DHAR, &pvt->dhar);
+ amd64_read_pci_cfg(pvt->F1, K8_DHAR, &pvt->dhar);
amd64_read_dbam_reg(pvt);
- amd64_read_pci_cfg(pvt->misc_f3_ctl,
- F10_ONLINE_SPARE, &pvt->online_spare);
+ amd64_read_pci_cfg(pvt->F3, F10_ONLINE_SPARE, &pvt->online_spare);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_0, &pvt->dclr0);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_0, &pvt->dchr0);
+ amd64_read_pci_cfg(pvt->F2, F10_DCLR_0, &pvt->dclr0);
+ amd64_read_pci_cfg(pvt->F2, F10_DCHR_0, &pvt->dchr0);
if (boot_cpu_data.x86 >= 0x10) {
if (!dct_ganging_enabled(pvt)) {
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCLR_1, &pvt->dclr1);
- amd64_read_pci_cfg(pvt->dram_f2_ctl, F10_DCHR_1, &pvt->dchr1);
+ amd64_read_pci_cfg(pvt->F2, F10_DCLR_1, &pvt->dclr1);
+ amd64_read_pci_cfg(pvt->F2, F10_DCHR_1, &pvt->dchr1);
}
- amd64_read_pci_cfg(pvt->misc_f3_ctl, EXT_NB_MCA_CFG, &tmp);
+ amd64_read_pci_cfg(pvt->F3, EXT_NB_MCA_CFG, &tmp);
}
if (boot_cpu_data.x86 == 0x10 &&
@@ -2321,21 +2188,22 @@ static u32 amd64_csrow_nr_pages(int csrow_nr, struct amd64_pvt *pvt)
* Initialize the array of csrow attribute instances, based on the values
* from pci config hardware registers.
*/
-static int amd64_init_csrows(struct mem_ctl_info *mci)
+static int init_csrows(struct mem_ctl_info *mci)
{
struct csrow_info *csrow;
- struct amd64_pvt *pvt;
+ struct amd64_pvt *pvt = mci->pvt_info;
u64 input_addr_min, input_addr_max, sys_addr;
+ u32 val;
int i, empty = 1;
- pvt = mci->pvt_info;
+ amd64_read_pci_cfg(pvt->F3, K8_NBCFG, &val);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &pvt->nbcfg);
+ pvt->nbcfg = val;
+ pvt->ctl_error_info.nbcfg = val;
- debugf0("NBCFG= 0x%x CHIPKILL= %s DRAM ECC= %s\n", pvt->nbcfg,
- (pvt->nbcfg & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (pvt->nbcfg & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled"
- );
+ debugf0("node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ pvt->mc_node_id, val,
+ !!(val & K8_NBCFG_CHIPKILL), !!(val & K8_NBCFG_ECC_ENABLE));
for (i = 0; i < pvt->cs_count; i++) {
csrow = &mci->csrows[i];
@@ -2359,7 +2227,7 @@ static int amd64_init_csrows(struct mem_ctl_info *mci)
csrow->page_mask = ~mask_from_dct_mask(pvt, i);
/* 8 bytes of resolution */
- csrow->mtype = amd64_determine_memory_type(pvt);
+ csrow->mtype = amd64_determine_memory_type(pvt, i);
debugf1(" for MC node %d csrow %d:\n", pvt->mc_node_id, i);
debugf1(" input_addr_min: 0x%lx input_addr_max: 0x%lx\n",
@@ -2404,8 +2272,7 @@ static bool amd64_nb_mce_bank_enabled_on_node(int nid)
bool ret = false;
if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) {
- amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
- __func__);
+ amd64_warn("%s: Error allocating mask\n", __func__);
return false;
}
@@ -2431,18 +2298,17 @@ out:
return ret;
}
-static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
+static int toggle_ecc_err_reporting(struct ecc_settings *s, u8 nid, bool on)
{
cpumask_var_t cmask;
int cpu;
if (!zalloc_cpumask_var(&cmask, GFP_KERNEL)) {
- amd64_printk(KERN_WARNING, "%s: error allocating mask\n",
- __func__);
+ amd64_warn("%s: error allocating mask\n", __func__);
return false;
}
- get_cpus_on_this_dct_cpumask(cmask, pvt->mc_node_id);
+ get_cpus_on_this_dct_cpumask(cmask, nid);
rdmsr_on_cpus(cmask, MSR_IA32_MCG_CTL, msrs);
@@ -2452,14 +2318,14 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
if (on) {
if (reg->l & K8_MSR_MCGCTL_NBE)
- pvt->flags.nb_mce_enable = 1;
+ s->flags.nb_mce_enable = 1;
reg->l |= K8_MSR_MCGCTL_NBE;
} else {
/*
* Turn off NB MCE reporting only when it was off before
*/
- if (!pvt->flags.nb_mce_enable)
+ if (!s->flags.nb_mce_enable)
reg->l &= ~K8_MSR_MCGCTL_NBE;
}
}
@@ -2470,92 +2336,92 @@ static int amd64_toggle_ecc_err_reporting(struct amd64_pvt *pvt, bool on)
return 0;
}
-static void amd64_enable_ecc_error_reporting(struct mem_ctl_info *mci)
+static bool enable_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+ struct pci_dev *F3)
{
- struct amd64_pvt *pvt = mci->pvt_info;
+ bool ret = true;
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value);
+ if (toggle_ecc_err_reporting(s, nid, ON)) {
+ amd64_warn("Error enabling ECC reporting over MCGCTL!\n");
+ return false;
+ }
+
+ amd64_read_pci_cfg(F3, K8_NBCTL, &value);
- /* turn on UECCn and CECCEn bits */
- pvt->old_nbctl = value & mask;
- pvt->nbctl_mcgctl_saved = 1;
+ /* turn on UECCEn and CECCEn bits */
+ s->old_nbctl = value & mask;
+ s->nbctl_valid = true;
value |= mask;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
-
- if (amd64_toggle_ecc_err_reporting(pvt, ON))
- amd64_printk(KERN_WARNING, "Error enabling ECC reporting over "
- "MCGCTL!\n");
+ pci_write_config_dword(F3, K8_NBCTL, value);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, K8_NBCFG, &value);
- debugf0("NBCFG(1)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
- (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+ debugf0("1: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ nid, value,
+ !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
if (!(value & K8_NBCFG_ECC_ENABLE)) {
- amd64_printk(KERN_WARNING,
- "This node reports that DRAM ECC is "
- "currently Disabled; ENABLING now\n");
+ amd64_warn("DRAM ECC disabled on this node, enabling...\n");
- pvt->flags.nb_ecc_prev = 0;
+ s->flags.nb_ecc_prev = 0;
/* Attempt to turn on DRAM ECC Enable */
value |= K8_NBCFG_ECC_ENABLE;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+ pci_write_config_dword(F3, K8_NBCFG, value);
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, K8_NBCFG, &value);
if (!(value & K8_NBCFG_ECC_ENABLE)) {
- amd64_printk(KERN_WARNING,
- "Hardware rejects Enabling DRAM ECC checking\n"
- "Check memory DIMM configuration\n");
+ amd64_warn("Hardware rejected DRAM ECC enable,"
+ "check memory DIMM configuration.\n");
+ ret = false;
} else {
- amd64_printk(KERN_DEBUG,
- "Hardware accepted DRAM ECC Enable\n");
+ amd64_info("Hardware accepted DRAM ECC Enable\n");
}
} else {
- pvt->flags.nb_ecc_prev = 1;
+ s->flags.nb_ecc_prev = 1;
}
- debugf0("NBCFG(2)= 0x%x CHIPKILL= %s ECC_ENABLE= %s\n", value,
- (value & K8_NBCFG_CHIPKILL) ? "Enabled" : "Disabled",
- (value & K8_NBCFG_ECC_ENABLE) ? "Enabled" : "Disabled");
+ debugf0("2: node %d, NBCFG=0x%08x[ChipKillEccCap: %d|DramEccEn: %d]\n",
+ nid, value,
+ !!(value & K8_NBCFG_CHIPKILL), !!(value & K8_NBCFG_ECC_ENABLE));
- pvt->ctl_error_info.nbcfg = value;
+ return ret;
}
-static void amd64_restore_ecc_error_reporting(struct amd64_pvt *pvt)
+static void restore_ecc_error_reporting(struct ecc_settings *s, u8 nid,
+ struct pci_dev *F3)
{
u32 value, mask = K8_NBCTL_CECCEn | K8_NBCTL_UECCEn;
- if (!pvt->nbctl_mcgctl_saved)
+ if (!s->nbctl_valid)
return;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCTL, &value);
+ amd64_read_pci_cfg(F3, K8_NBCTL, &value);
value &= ~mask;
- value |= pvt->old_nbctl;
+ value |= s->old_nbctl;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCTL, value);
+ pci_write_config_dword(F3, K8_NBCTL, value);
- /* restore previous BIOS DRAM ECC "off" setting which we force-enabled */
- if (!pvt->flags.nb_ecc_prev) {
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ /* restore previous BIOS DRAM ECC "off" setting we force-enabled */
+ if (!s->flags.nb_ecc_prev) {
+ amd64_read_pci_cfg(F3, K8_NBCFG, &value);
value &= ~K8_NBCFG_ECC_ENABLE;
- pci_write_config_dword(pvt->misc_f3_ctl, K8_NBCFG, value);
+ pci_write_config_dword(F3, K8_NBCFG, value);
}
/* restore the NB Enable MCGCTL bit */
- if (amd64_toggle_ecc_err_reporting(pvt, OFF))
- amd64_printk(KERN_WARNING, "Error restoring NB MCGCTL settings!\n");
+ if (toggle_ecc_err_reporting(s, nid, OFF))
+ amd64_warn("Error restoring NB MCGCTL settings!\n");
}
/*
- * EDAC requires that the BIOS have ECC enabled before taking over the
- * processing of ECC errors. This is because the BIOS can properly initialize
- * the memory system completely. A command line option allows to force-enable
- * hardware ECC later in amd64_enable_ecc_error_reporting().
+ * EDAC requires that the BIOS have ECC enabled before
+ * taking over the processing of ECC errors. A command line
+ * option allows to force-enable hardware ECC later in
+ * enable_ecc_error_reporting().
*/
static const char *ecc_msg =
"ECC disabled in the BIOS or no ECC capability, module will not load.\n"
@@ -2563,38 +2429,28 @@ static const char *ecc_msg =
"'ecc_enable_override'.\n"
" (Note that use of the override may cause unknown side effects.)\n";
-static int amd64_check_ecc_enabled(struct amd64_pvt *pvt)
+static bool ecc_enabled(struct pci_dev *F3, u8 nid)
{
u32 value;
- u8 ecc_enabled = 0;
+ u8 ecc_en = 0;
bool nb_mce_en = false;
- amd64_read_pci_cfg(pvt->misc_f3_ctl, K8_NBCFG, &value);
+ amd64_read_pci_cfg(F3, K8_NBCFG, &value);
- ecc_enabled = !!(value & K8_NBCFG_ECC_ENABLE);
- if (!ecc_enabled)
- amd64_printk(KERN_NOTICE, "This node reports that Memory ECC "
- "is currently disabled, set F3x%x[22] (%s).\n",
- K8_NBCFG, pci_name(pvt->misc_f3_ctl));
- else
- amd64_printk(KERN_INFO, "ECC is enabled by BIOS.\n");
+ ecc_en = !!(value & K8_NBCFG_ECC_ENABLE);
+ amd64_info("DRAM ECC %s.\n", (ecc_en ? "enabled" : "disabled"));
- nb_mce_en = amd64_nb_mce_bank_enabled_on_node(pvt->mc_node_id);
+ nb_mce_en = amd64_nb_mce_bank_enabled_on_node(nid);
if (!nb_mce_en)
- amd64_printk(KERN_NOTICE, "NB MCE bank disabled, set MSR "
+ amd64_notice("NB MCE bank disabled, set MSR "
"0x%08x[4] on node %d to enable.\n",
- MSR_IA32_MCG_CTL, pvt->mc_node_id);
+ MSR_IA32_MCG_CTL, nid);
- if (!ecc_enabled || !nb_mce_en) {
- if (!ecc_enable_override) {
- amd64_printk(KERN_NOTICE, "%s", ecc_msg);
- return -ENODEV;
- } else {
- amd64_printk(KERN_WARNING, "Forcing ECC checking on!\n");
- }
+ if (!ecc_en || !nb_mce_en) {
+ amd64_notice("%s", ecc_msg);
+ return false;
}
-
- return 0;
+ return true;
}
struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
@@ -2603,22 +2459,23 @@ struct mcidev_sysfs_attribute sysfs_attrs[ARRAY_SIZE(amd64_dbg_attrs) +
struct mcidev_sysfs_attribute terminator = { .attr = { .name = NULL } };
-static void amd64_set_mc_sysfs_attributes(struct mem_ctl_info *mci)
+static void set_mc_sysfs_attrs(struct mem_ctl_info *mci)
{
unsigned int i = 0, j = 0;
for (; i < ARRAY_SIZE(amd64_dbg_attrs); i++)
sysfs_attrs[i] = amd64_dbg_attrs[i];
- for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
- sysfs_attrs[i] = amd64_inj_attrs[j];
+ if (boot_cpu_data.x86 >= 0x10)
+ for (j = 0; j < ARRAY_SIZE(amd64_inj_attrs); j++, i++)
+ sysfs_attrs[i] = amd64_inj_attrs[j];
sysfs_attrs[i] = terminator;
mci->mc_driver_sysfs_attributes = sysfs_attrs;
}
-static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
+static void setup_mci_misc_attrs(struct mem_ctl_info *mci)
{
struct amd64_pvt *pvt = mci->pvt_info;
@@ -2634,8 +2491,8 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
mci->edac_cap = amd64_determine_edac_cap(pvt);
mci->mod_name = EDAC_MOD_STR;
mci->mod_ver = EDAC_AMD64_VERSION;
- mci->ctl_name = get_amd_family_name(pvt->mc_type_index);
- mci->dev_name = pci_name(pvt->dram_f2_ctl);
+ mci->ctl_name = pvt->ctl_name;
+ mci->dev_name = pci_name(pvt->F2);
mci->ctl_page_to_phys = NULL;
/* memory scrubber interface */
@@ -2644,111 +2501,94 @@ static void amd64_setup_mci_misc_attributes(struct mem_ctl_info *mci)
}
/*
- * Init stuff for this DRAM Controller device.
- *
- * Due to a hardware feature on Fam10h CPUs, the Enable Extended Configuration
- * Space feature MUST be enabled on ALL Processors prior to actually reading
- * from the ECS registers. Since the loading of the module can occur on any
- * 'core', and cores don't 'see' all the other processors ECS data when the
- * others are NOT enabled. Our solution is to first enable ECS access in this
- * routine on all processors, gather some data in a amd64_pvt structure and
- * later come back in a finish-setup function to perform that final
- * initialization. See also amd64_init_2nd_stage() for that.
+ * returns a pointer to the family descriptor on success, NULL otherwise.
*/
-static int amd64_probe_one_instance(struct pci_dev *dram_f2_ctl,
- int mc_type_index)
+static struct amd64_family_type *amd64_per_family_init(struct amd64_pvt *pvt)
+{
+ u8 fam = boot_cpu_data.x86;
+ struct amd64_family_type *fam_type = NULL;
+
+ switch (fam) {
+ case 0xf:
+ fam_type = &amd64_family_types[K8_CPUS];
+ pvt->ops = &amd64_family_types[K8_CPUS].ops;
+ pvt->ctl_name = fam_type->ctl_name;
+ pvt->min_scrubrate = K8_MIN_SCRUB_RATE_BITS;
+ break;
+ case 0x10:
+ fam_type = &amd64_family_types[F10_CPUS];
+ pvt->ops = &amd64_family_types[F10_CPUS].ops;
+ pvt->ctl_name = fam_type->ctl_name;
+ pvt->min_scrubrate = F10_MIN_SCRUB_RATE_BITS;
+ break;
+
+ default:
+ amd64_err("Unsupported family!\n");
+ return NULL;
+ }
+
+ pvt->ext_model = boot_cpu_data.x86_model >> 4;
+
+ amd64_info("%s %sdetected (node %d).\n", pvt->ctl_name,
+ (fam == 0xf ?
+ (pvt->ext_model >= K8_REV_F ? "revF or later "
+ : "revE or earlier ")
+ : ""), pvt->mc_node_id);
+ return fam_type;
+}
+
+static int amd64_init_one_instance(struct pci_dev *F2)
{
struct amd64_pvt *pvt = NULL;
+ struct amd64_family_type *fam_type = NULL;
+ struct mem_ctl_info *mci = NULL;
int err = 0, ret;
+ u8 nid = get_node_id(F2);
ret = -ENOMEM;
pvt = kzalloc(sizeof(struct amd64_pvt), GFP_KERNEL);
if (!pvt)
- goto err_exit;
+ goto err_ret;
- pvt->mc_node_id = get_node_id(dram_f2_ctl);
+ pvt->mc_node_id = nid;
+ pvt->F2 = F2;
- pvt->dram_f2_ctl = dram_f2_ctl;
- pvt->ext_model = boot_cpu_data.x86_model >> 4;
- pvt->mc_type_index = mc_type_index;
- pvt->ops = family_ops(mc_type_index);
+ ret = -EINVAL;
+ fam_type = amd64_per_family_init(pvt);
+ if (!fam_type)
+ goto err_free;
- /*
- * We have the dram_f2_ctl device as an argument, now go reserve its
- * sibling devices from the PCI system.
- */
ret = -ENODEV;
- err = amd64_reserve_mc_sibling_devices(pvt, mc_type_index);
+ err = reserve_mc_sibling_devs(pvt, fam_type->f1_id, fam_type->f3_id);
if (err)
goto err_free;
- ret = -EINVAL;
- err = amd64_check_ecc_enabled(pvt);
- if (err)
- goto err_put;
-
- /*
- * Key operation here: setup of HW prior to performing ops on it. Some
- * setup is required to access ECS data. After this is performed, the
- * 'teardown' function must be called upon error and normal exit paths.
- */
- if (boot_cpu_data.x86 >= 0x10)
- amd64_setup(pvt);
-
- /*
- * Save the pointer to the private data for use in 2nd initialization
- * stage
- */
- pvt_lookup[pvt->mc_node_id] = pvt;
-
- return 0;
-
-err_put:
- amd64_free_mc_sibling_devices(pvt);
-
-err_free:
- kfree(pvt);
-
-err_exit:
- return ret;
-}
-
-/*
- * This is the finishing stage of the init code. Needs to be performed after all
- * MCs' hardware have been prepped for accessing extended config space.
- */
-static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
-{
- int node_id = pvt->mc_node_id;
- struct mem_ctl_info *mci;
- int ret = -ENODEV;
-
- amd64_read_mc_registers(pvt);
+ read_mc_regs(pvt);
/*
* We need to determine how many memory channels there are. Then use
* that information for calculating the size of the dynamic instance
- * tables in the 'mci' structure
+ * tables in the 'mci' structure.
*/
+ ret = -EINVAL;
pvt->channel_count = pvt->ops->early_channel_count(pvt);
if (pvt->channel_count < 0)
- goto err_exit;
+ goto err_siblings;
ret = -ENOMEM;
- mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, node_id);
+ mci = edac_mc_alloc(0, pvt->cs_count, pvt->channel_count, nid);
if (!mci)
- goto err_exit;
+ goto err_siblings;
mci->pvt_info = pvt;
+ mci->dev = &pvt->F2->dev;
- mci->dev = &pvt->dram_f2_ctl->dev;
- amd64_setup_mci_misc_attributes(mci);
+ setup_mci_misc_attrs(mci);
- if (amd64_init_csrows(mci))
+ if (init_csrows(mci))
mci->edac_cap = EDAC_FLAG_NONE;
- amd64_enable_ecc_error_reporting(mci);
- amd64_set_mc_sysfs_attributes(mci);
+ set_mc_sysfs_attrs(mci);
ret = -ENODEV;
if (edac_mc_add_mc(mci)) {
@@ -2756,54 +2596,77 @@ static int amd64_init_2nd_stage(struct amd64_pvt *pvt)
goto err_add_mc;
}
- mci_lookup[node_id] = mci;
- pvt_lookup[node_id] = NULL;
-
/* register stuff with EDAC MCE */
if (report_gart_errors)
amd_report_gart_errors(true);
amd_register_ecc_decoder(amd64_decode_bus_error);
+ mcis[nid] = mci;
+
+ atomic_inc(&drv_instances);
+
return 0;
err_add_mc:
edac_mc_free(mci);
-err_exit:
- debugf0("failure to init 2nd stage: ret=%d\n", ret);
-
- amd64_restore_ecc_error_reporting(pvt);
-
- if (boot_cpu_data.x86 > 0xf)
- amd64_teardown(pvt);
+err_siblings:
+ free_mc_sibling_devs(pvt);
- amd64_free_mc_sibling_devices(pvt);
-
- kfree(pvt_lookup[pvt->mc_node_id]);
- pvt_lookup[node_id] = NULL;
+err_free:
+ kfree(pvt);
+err_ret:
return ret;
}
-
-static int __devinit amd64_init_one_instance(struct pci_dev *pdev,
- const struct pci_device_id *mc_type)
+static int __devinit amd64_probe_one_instance(struct pci_dev *pdev,
+ const struct pci_device_id *mc_type)
{
+ u8 nid = get_node_id(pdev);
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ struct ecc_settings *s;
int ret = 0;
- debugf0("(MC node=%d,mc_type='%s')\n", get_node_id(pdev),
- get_amd_family_name(mc_type->driver_data));
-
ret = pci_enable_device(pdev);
- if (ret < 0)
- ret = -EIO;
- else
- ret = amd64_probe_one_instance(pdev, mc_type->driver_data);
-
- if (ret < 0)
+ if (ret < 0) {
debugf0("ret=%d\n", ret);
+ return -EIO;
+ }
+
+ ret = -ENOMEM;
+ s = kzalloc(sizeof(struct ecc_settings), GFP_KERNEL);
+ if (!s)
+ goto err_out;
+
+ ecc_stngs[nid] = s;
+
+ if (!ecc_enabled(F3, nid)) {
+ ret = -ENODEV;
+
+ if (!ecc_enable_override)
+ goto err_enable;
+
+ amd64_warn("Forcing ECC on!\n");
+
+ if (!enable_ecc_error_reporting(s, nid, F3))
+ goto err_enable;
+ }
+ ret = amd64_init_one_instance(pdev);
+ if (ret < 0) {
+ amd64_err("Error probing instance: %d\n", nid);
+ restore_ecc_error_reporting(s, nid, F3);
+ }
+
+ return ret;
+
+err_enable:
+ kfree(s);
+ ecc_stngs[nid] = NULL;
+
+err_out:
return ret;
}
@@ -2811,6 +2674,9 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
+ u8 nid = get_node_id(pdev);
+ struct pci_dev *F3 = node_to_amd_nb(nid)->misc;
+ struct ecc_settings *s = ecc_stngs[nid];
/* Remove from EDAC CORE tracking list */
mci = edac_mc_del_mc(&pdev->dev);
@@ -2819,20 +2685,20 @@ static void __devexit amd64_remove_one_instance(struct pci_dev *pdev)
pvt = mci->pvt_info;
- amd64_restore_ecc_error_reporting(pvt);
+ restore_ecc_error_reporting(s, nid, F3);
- if (boot_cpu_data.x86 > 0xf)
- amd64_teardown(pvt);
-
- amd64_free_mc_sibling_devices(pvt);
+ free_mc_sibling_devs(pvt);
/* unregister from EDAC MCE */
amd_report_gart_errors(false);
amd_unregister_ecc_decoder(amd64_decode_bus_error);
+ kfree(ecc_stngs[nid]);
+ ecc_stngs[nid] = NULL;
+
/* Free the EDAC CORE resources */
mci->pvt_info = NULL;
- mci_lookup[pvt->mc_node_id] = NULL;
+ mcis[nid] = NULL;
kfree(pvt);
edac_mc_free(mci);
@@ -2851,7 +2717,6 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
- .driver_data = K8_CPUS
},
{
.vendor = PCI_VENDOR_ID_AMD,
@@ -2860,16 +2725,6 @@ static const struct pci_device_id amd64_pci_table[] __devinitdata = {
.subdevice = PCI_ANY_ID,
.class = 0,
.class_mask = 0,
- .driver_data = F10_CPUS
- },
- {
- .vendor = PCI_VENDOR_ID_AMD,
- .device = PCI_DEVICE_ID_AMD_11H_NB_DRAM,
- .subvendor = PCI_ANY_ID,
- .subdevice = PCI_ANY_ID,
- .class = 0,
- .class_mask = 0,
- .driver_data = F11_CPUS
},
{0, }
};
@@ -2877,12 +2732,12 @@ MODULE_DEVICE_TABLE(pci, amd64_pci_table);
static struct pci_driver amd64_pci_driver = {
.name = EDAC_MOD_STR,
- .probe = amd64_init_one_instance,
+ .probe = amd64_probe_one_instance,
.remove = __devexit_p(amd64_remove_one_instance),
.id_table = amd64_pci_table,
};
-static void amd64_setup_pci_device(void)
+static void setup_pci_device(void)
{
struct mem_ctl_info *mci;
struct amd64_pvt *pvt;
@@ -2890,13 +2745,12 @@ static void amd64_setup_pci_device(void)
if (amd64_ctl_pci)
return;
- mci = mci_lookup[0];
+ mci = mcis[0];
if (mci) {
pvt = mci->pvt_info;
amd64_ctl_pci =
- edac_pci_create_generic_ctl(&pvt->dram_f2_ctl->dev,
- EDAC_MOD_STR);
+ edac_pci_create_generic_ctl(&pvt->F2->dev, EDAC_MOD_STR);
if (!amd64_ctl_pci) {
pr_warning("%s(): Unable to create PCI control\n",
@@ -2910,8 +2764,7 @@ static void amd64_setup_pci_device(void)
static int __init amd64_edac_init(void)
{
- int nb, err = -ENODEV;
- bool load_ok = false;
+ int err = -ENODEV;
edac_printk(KERN_INFO, EDAC_MOD_STR, EDAC_AMD64_VERSION "\n");
@@ -2920,41 +2773,41 @@ static int __init amd64_edac_init(void)
if (amd_cache_northbridges() < 0)
goto err_ret;
+ err = -ENOMEM;
+ mcis = kzalloc(amd_nb_num() * sizeof(mcis[0]), GFP_KERNEL);
+ ecc_stngs = kzalloc(amd_nb_num() * sizeof(ecc_stngs[0]), GFP_KERNEL);
+ if (!(mcis && ecc_stngs))
+ goto err_ret;
+
msrs = msrs_alloc();
if (!msrs)
- goto err_ret;
+ goto err_free;
err = pci_register_driver(&amd64_pci_driver);
if (err)
goto err_pci;
- /*
- * At this point, the array 'pvt_lookup[]' contains pointers to alloc'd
- * amd64_pvt structs. These will be used in the 2nd stage init function
- * to finish initialization of the MC instances.
- */
err = -ENODEV;
- for (nb = 0; nb < amd_nb_num(); nb++) {
- if (!pvt_lookup[nb])
- continue;
-
- err = amd64_init_2nd_stage(pvt_lookup[nb]);
- if (err)
- goto err_2nd_stage;
+ if (!atomic_read(&drv_instances))
+ goto err_no_instances;
- load_ok = true;
- }
-
- if (load_ok) {
- amd64_setup_pci_device();
- return 0;
- }
+ setup_pci_device();
+ return 0;
-err_2nd_stage:
+err_no_instances:
pci_unregister_driver(&amd64_pci_driver);
+
err_pci:
msrs_free(msrs);
msrs = NULL;
+
+err_free:
+ kfree(mcis);
+ mcis = NULL;
+
+ kfree(ecc_stngs);
+ ecc_stngs = NULL;
+
err_ret:
return err;
}
@@ -2966,6 +2819,12 @@ static void __exit amd64_edac_exit(void)
pci_unregister_driver(&amd64_pci_driver);
+ kfree(ecc_stngs);
+ ecc_stngs = NULL;
+
+ kfree(mcis);
+ mcis = NULL;
+
msrs_free(msrs);
msrs = NULL;
}
diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h
index 044aee4f944d..613ec72b0f65 100644
--- a/drivers/edac/amd64_edac.h
+++ b/drivers/edac/amd64_edac.h
@@ -74,11 +74,26 @@
#include "edac_core.h"
#include "mce_amd.h"
-#define amd64_printk(level, fmt, arg...) \
- edac_printk(level, "amd64", fmt, ##arg)
+#define amd64_debug(fmt, arg...) \
+ edac_printk(KERN_DEBUG, "amd64", fmt, ##arg)
-#define amd64_mc_printk(mci, level, fmt, arg...) \
- edac_mc_chipset_printk(mci, level, "amd64", fmt, ##arg)
+#define amd64_info(fmt, arg...) \
+ edac_printk(KERN_INFO, "amd64", fmt, ##arg)
+
+#define amd64_notice(fmt, arg...) \
+ edac_printk(KERN_NOTICE, "amd64", fmt, ##arg)
+
+#define amd64_warn(fmt, arg...) \
+ edac_printk(KERN_WARNING, "amd64", fmt, ##arg)
+
+#define amd64_err(fmt, arg...) \
+ edac_printk(KERN_ERR, "amd64", fmt, ##arg)
+
+#define amd64_mc_warn(mci, fmt, arg...) \
+ edac_mc_chipset_printk(mci, KERN_WARNING, "amd64", fmt, ##arg)
+
+#define amd64_mc_err(mci, fmt, arg...) \
+ edac_mc_chipset_printk(mci, KERN_ERR, "amd64", fmt, ##arg)
/*
* Throughout the comments in this code, the following terms are used:
@@ -129,11 +144,9 @@
* sections 3.5.4 and 3.5.5 for more information.
*/
-#define EDAC_AMD64_VERSION " Ver: 3.3.0 " __DATE__
+#define EDAC_AMD64_VERSION "v3.3.0"
#define EDAC_MOD_STR "amd64_edac"
-#define EDAC_MAX_NUMNODES 8
-
/* Extended Model from CPUID, for CPU Revision numbers */
#define K8_REV_D 1
#define K8_REV_E 2
@@ -322,9 +335,6 @@
#define K8_SCRCTRL 0x58
#define F10_NB_CFG_LOW 0x88
-#define F10_NB_CFG_LOW_ENABLE_EXT_CFG BIT(14)
-
-#define F10_NB_CFG_HIGH 0x8C
#define F10_ONLINE_SPARE 0xB0
#define F10_ONLINE_SPARE_SWAPDONE0(x) ((x) & BIT(1))
@@ -373,7 +383,6 @@ static inline int get_node_id(struct pci_dev *pdev)
enum amd64_chipset_families {
K8_CPUS = 0,
F10_CPUS,
- F11_CPUS,
};
/* Error injection control structure */
@@ -384,16 +393,13 @@ struct error_injection {
};
struct amd64_pvt {
+ struct low_ops *ops;
+
/* pci_device handles which we utilize */
- struct pci_dev *addr_f1_ctl;
- struct pci_dev *dram_f2_ctl;
- struct pci_dev *misc_f3_ctl;
+ struct pci_dev *F1, *F2, *F3;
int mc_node_id; /* MC index of this MC node */
int ext_model; /* extended model value of this node */
-
- struct low_ops *ops; /* pointer to per PCI Device ID func table */
-
int channel_count;
/* Raw registers */
@@ -455,27 +461,27 @@ struct amd64_pvt {
/* place to store error injection parameters prior to issue */
struct error_injection injection;
- /* Save old hw registers' values before we modified them */
- u32 nbctl_mcgctl_saved; /* When true, following 2 are valid */
- u32 old_nbctl;
+ /* DCT per-family scrubrate setting */
+ u32 min_scrubrate;
- /* MC Type Index value: socket F vs Family 10h */
- u32 mc_type_index;
+ /* family name this instance is running on */
+ const char *ctl_name;
+
+};
+
+/*
+ * per-node ECC settings descriptor
+ */
+struct ecc_settings {
+ u32 old_nbctl;
+ bool nbctl_valid;
- /* misc settings */
struct flags {
- unsigned long cf8_extcfg:1;
unsigned long nb_mce_enable:1;
unsigned long nb_ecc_prev:1;
} flags;
};
-struct scrubrate {
- u32 scrubval; /* bit pattern for scrub rate */
- u32 bandwidth; /* bandwidth consumed (bytes/sec) */
-};
-
-extern struct scrubrate scrubrates[23];
extern const char *tt_msgs[4];
extern const char *ll_msgs[4];
extern const char *rrrr_msgs[16];
@@ -517,23 +523,10 @@ struct low_ops {
struct amd64_family_type {
const char *ctl_name;
- u16 addr_f1_ctl;
- u16 misc_f3_ctl;
+ u16 f1_id, f3_id;
struct low_ops ops;
};
-static struct amd64_family_type amd64_family_types[];
-
-static inline const char *get_amd_family_name(int index)
-{
- return amd64_family_types[index].ctl_name;
-}
-
-static inline struct low_ops *family_ops(int index)
-{
- return &amd64_family_types[index].ops;
-}
-
static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
u32 *val, const char *func)
{
@@ -541,8 +534,8 @@ static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
err = pci_read_config_dword(pdev, offset, val);
if (err)
- amd64_printk(KERN_WARNING, "%s: error reading F%dx%x.\n",
- func, PCI_FUNC(pdev->devfn), offset);
+ amd64_warn("%s: error reading F%dx%x.\n",
+ func, PCI_FUNC(pdev->devfn), offset);
return err;
}
@@ -556,7 +549,6 @@ static inline int amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset,
*/
#define K8_MIN_SCRUB_RATE_BITS 0x0
#define F10_MIN_SCRUB_RATE_BITS 0x5
-#define F11_MIN_SCRUB_RATE_BITS 0x6
int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base,
u64 *hole_offset, u64 *hole_size);
diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c
index 29f1f7a612d9..688478de1cbd 100644
--- a/drivers/edac/amd64_edac_inj.c
+++ b/drivers/edac/amd64_edac_inj.c
@@ -23,9 +23,7 @@ static ssize_t amd64_inject_section_store(struct mem_ctl_info *mci,
if (ret != -EINVAL) {
if (value > 3) {
- amd64_printk(KERN_WARNING,
- "%s: invalid section 0x%lx\n",
- __func__, value);
+ amd64_warn("%s: invalid section 0x%lx\n", __func__, value);
return -EINVAL;
}
@@ -58,9 +56,7 @@ static ssize_t amd64_inject_word_store(struct mem_ctl_info *mci,
if (ret != -EINVAL) {
if (value > 8) {
- amd64_printk(KERN_WARNING,
- "%s: invalid word 0x%lx\n",
- __func__, value);
+ amd64_warn("%s: invalid word 0x%lx\n", __func__, value);
return -EINVAL;
}
@@ -92,9 +88,8 @@ static ssize_t amd64_inject_ecc_vector_store(struct mem_ctl_info *mci,
if (ret != -EINVAL) {
if (value & 0xFFFF0000) {
- amd64_printk(KERN_WARNING,
- "%s: invalid EccVector: 0x%lx\n",
- __func__, value);
+ amd64_warn("%s: invalid EccVector: 0x%lx\n",
+ __func__, value);
return -EINVAL;
}
@@ -122,15 +117,13 @@ static ssize_t amd64_inject_read_store(struct mem_ctl_info *mci,
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM_ECC |
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_ADDR, section);
+ pci_write_config_dword(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_READ(pvt->injection.word,
pvt->injection.bit_map);
/* Issue 'word' and 'bit' along with the READ request */
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_DATA, word_bits);
+ pci_write_config_dword(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
@@ -157,15 +150,13 @@ static ssize_t amd64_inject_write_store(struct mem_ctl_info *mci,
/* Form value to choose 16-byte section of cacheline */
section = F10_NB_ARRAY_DRAM_ECC |
SET_NB_ARRAY_ADDRESS(pvt->injection.section);
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_ADDR, section);
+ pci_write_config_dword(pvt->F3, F10_NB_ARRAY_ADDR, section);
word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection.word,
pvt->injection.bit_map);
/* Issue 'word' and 'bit' along with the READ request */
- pci_write_config_dword(pvt->misc_f3_ctl,
- F10_NB_ARRAY_DATA, word_bits);
+ pci_write_config_dword(pvt->F3, F10_NB_ARRAY_DATA, word_bits);
debugf0("section=0x%x word_bits=0x%x\n", section, word_bits);
diff --git a/drivers/edac/cpc925_edac.c b/drivers/edac/cpc925_edac.c
index 1609a19df495..b9a781c47e3c 100644
--- a/drivers/edac/cpc925_edac.c
+++ b/drivers/edac/cpc925_edac.c
@@ -818,9 +818,10 @@ static void cpc925_del_edac_devices(void)
}
/* Convert current back-ground scrub rate into byte/sec bandwith */
-static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci)
{
struct cpc925_mc_pdata *pdata = mci->pvt_info;
+ int bw;
u32 mscr;
u8 si;
@@ -832,11 +833,11 @@ static int cpc925_get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
if (((mscr & MSCR_SCRUB_MOD_MASK) != MSCR_BACKGR_SCRUB) ||
(si == 0)) {
cpc925_mc_printk(mci, KERN_INFO, "Scrub mode not enabled\n");
- *bw = 0;
+ bw = 0;
} else
- *bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si;
+ bw = CPC925_SCRUB_BLOCK_SIZE * 0xFA67 / si;
- return 0;
+ return bw;
}
/* Return 0 for single channel; 1 for dual channel */
diff --git a/drivers/edac/e752x_edac.c b/drivers/edac/e752x_edac.c
index 073f5a06d238..ec302d426589 100644
--- a/drivers/edac/e752x_edac.c
+++ b/drivers/edac/e752x_edac.c
@@ -983,11 +983,11 @@ static int set_sdram_scrub_rate(struct mem_ctl_info *mci, u32 new_bw)
pci_write_config_word(pdev, E752X_MCHSCRB, scrubrates[i].scrubval);
- return 0;
+ return scrubrates[i].bandwidth;
}
/* Convert current scrub rate value into byte/sec bandwidth */
-static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
+static int get_sdram_scrub_rate(struct mem_ctl_info *mci)
{
const struct scrubrate *scrubrates;
struct e752x_pvt *pvt = (struct e752x_pvt *) mci->pvt_info;
@@ -1013,10 +1013,8 @@ static int get_sdram_scrub_rate(struct mem_ctl_info *mci, u32 *bw)
"Invalid sdram scrub control value: 0x%x\n", scrubval);
return -1;
}
+ return scrubrates[i].bandwidth;
- *bw = scrubrates[i].bandwidth;
-
- return 0;
}
/* Return 1 if dual channel mode is active. Else return 0. */
diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
index 035eaaf2d906..3d965347a673 100644
--- a/drivers/edac/edac_core.h
+++ b/drivers/edac/edac_core.h
@@ -41,10 +41,10 @@
#define MC_PROC_NAME_MAX_LEN 7
#if PAGE_SHIFT < 20
-#define PAGES_TO_MiB( pages ) ( ( pages ) >> ( 20 - PAGE_SHIFT ) )
-#define MiB_TO_PAGES(mb) ((mb) >> (20 - PAGE_SHIFT))
+#define PAGES_TO_MiB(pages) ((pages) >> (20 - PAGE_SHIFT))
+#define MiB_TO_PAGES(mb) ((mb) << (20 - PAGE_SHIFT))
#else /* PAGE_SHIFT > 20 */
-#define PAGES_TO_MiB( pages ) ( ( pages ) << ( PAGE_SHIFT - 20 ) )
+#define PAGES_TO_MiB(pages) ((pages) << (PAGE_SHIFT - 20))
#define MiB_TO_PAGES(mb) ((mb) >> (PAGE_SHIFT - 20))
#endif
@@ -68,9 +68,10 @@
#define EDAC_PCI "PCI"
#define EDAC_DEBUG "DEBUG"
+extern const char *edac_mem_types[];
+
#ifdef CONFIG_EDAC_DEBUG
extern int edac_debug_level;
-extern const char *edac_mem_types[];
#define edac_debug_printk(level, fmt, arg...) \
do { \
@@ -386,7 +387,7 @@ struct mem_ctl_info {
representation and converts it to the closest matching
bandwith in bytes/sec.
*/
- int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci, u32 * bw);
+ int (*get_sdram_scrub_rate) (struct mem_ctl_info * mci);
/* pointer to edac checking routine */
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index ba6586a69ccc..a4e9db2d6524 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -76,6 +76,8 @@ static void edac_mc_dump_mci(struct mem_ctl_info *mci)
debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
}
+#endif /* CONFIG_EDAC_DEBUG */
+
/*
* keep those in sync with the enum mem_type
*/
@@ -100,8 +102,6 @@ const char *edac_mem_types[] = {
};
EXPORT_SYMBOL_GPL(edac_mem_types);
-#endif /* CONFIG_EDAC_DEBUG */
-
/* 'ptr' points to a possibly unaligned item X such that sizeof(X) is 'size'.
* Adjust 'ptr' so that its alignment is at least as stringent as what the
* compiler would provide for X and return the aligned result.
@@ -586,14 +586,16 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
return NULL;
}
- /* marking MCI offline */
- mci->op_state = OP_OFFLINE;
-
del_mc_from_global_list(mci);
mutex_unlock(&mem_ctls_mutex);
- /* flush workq processes and remove sysfs */
+ /* flush workq processes */
edac_mc_workq_teardown(mci);
+
+ /* marking MCI offline */
+ mci->op_state = OP_OFFLINE;
+
+ /* remove from sysfs */
edac_remove_sysfs_mci_device(mci);
edac_printk(KERN_INFO, EDAC_MC,
diff --git a/drivers/edac/edac_mc_sysfs.c b/drivers/edac/edac_mc_sysfs.c
index dce61f7ba38b..39d97cfdf58c 100644
--- a/drivers/edac/edac_mc_sysfs.c
+++ b/drivers/edac/edac_mc_sysfs.c
@@ -436,56 +436,55 @@ static ssize_t mci_reset_counters_store(struct mem_ctl_info *mci,
return count;
}
-/* memory scrubbing */
+/* Memory scrubbing interface:
+ *
+ * A MC driver can limit the scrubbing bandwidth based on the CPU type.
+ * Therefore, ->set_sdram_scrub_rate should be made to return the actual
+ * bandwidth that is accepted or 0 when scrubbing is to be disabled.
+ *
+ * Negative value still means that an error has occurred while setting
+ * the scrub rate.
+ */
static ssize_t mci_sdram_scrub_rate_store(struct mem_ctl_info *mci,
const char *data, size_t count)
{
unsigned long bandwidth = 0;
- int err;
+ int new_bw = 0;
- if (!mci->set_sdram_scrub_rate) {
- edac_printk(KERN_WARNING, EDAC_MC,
- "Memory scrub rate setting not implemented!\n");
+ if (!mci->set_sdram_scrub_rate)
return -EINVAL;
- }
if (strict_strtoul(data, 10, &bandwidth) < 0)
return -EINVAL;
- err = mci->set_sdram_scrub_rate(mci, (u32)bandwidth);
- if (err) {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Failed setting scrub rate to %lu\n", bandwidth);
- return -EINVAL;
- }
- else {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Scrub rate set to: %lu\n", bandwidth);
+ new_bw = mci->set_sdram_scrub_rate(mci, bandwidth);
+ if (new_bw >= 0) {
+ edac_printk(KERN_DEBUG, EDAC_MC, "Scrub rate set to %d\n", new_bw);
return count;
}
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "Error setting scrub rate to: %lu\n", bandwidth);
+ return -EINVAL;
}
+/*
+ * ->get_sdram_scrub_rate() return value semantics same as above.
+ */
static ssize_t mci_sdram_scrub_rate_show(struct mem_ctl_info *mci, char *data)
{
- u32 bandwidth = 0;
- int err;
+ int bandwidth = 0;
- if (!mci->get_sdram_scrub_rate) {
- edac_printk(KERN_WARNING, EDAC_MC,
- "Memory scrub rate reading not implemented\n");
+ if (!mci->get_sdram_scrub_rate)
return -EINVAL;
- }
- err = mci->get_sdram_scrub_rate(mci, &bandwidth);
- if (err) {
+ bandwidth = mci->get_sdram_scrub_rate(mci);
+ if (bandwidth < 0) {
edac_printk(KERN_DEBUG, EDAC_MC, "Error reading scrub rate\n");
- return err;
- }
- else {
- edac_printk(KERN_DEBUG, EDAC_MC,
- "Read scrub rate: %d\n", bandwidth);
- return sprintf(data, "%d\n", bandwidth);
+ return bandwidth;
}
+
+ edac_printk(KERN_DEBUG, EDAC_MC, "Read scrub rate: %d\n", bandwidth);
+ return sprintf(data, "%d\n", bandwidth);
}
/* default attribute files for the MCI object */
diff --git a/drivers/edac/i5100_edac.c b/drivers/edac/i5100_edac.c
index f459a6c0886b..0448da0af75d 100644
--- a/drivers/edac/i5100_edac.c
+++ b/drivers/edac/i5100_edac.c
@@ -611,20 +611,17 @@ static int i5100_set_scrub_rate(struct mem_ctl_info *mci, u32 bandwidth)
bandwidth = 5900000 * i5100_mc_scrben(dw);
- return 0;
+ return bandwidth;
}
-static int i5100_get_scrub_rate(struct mem_ctl_info *mci,
- u32 *bandwidth)
+static int i5100_get_scrub_rate(struct mem_ctl_info *mci)
{
struct i5100_priv *priv = mci->pvt_info;
u32 dw;
pci_read_config_dword(priv->mc, I5100_MC, &dw);
- *bandwidth = 5900000 * i5100_mc_scrben(dw);
-
- return 0;
+ return 5900000 * i5100_mc_scrben(dw);
}
static struct pci_dev *pci_get_device_func(unsigned vendor,
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c0181093b490..f6cf73d93359 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -5,6 +5,7 @@
static struct amd_decoder_ops *fam_ops;
+static u8 xec_mask = 0xf;
static u8 nb_err_cpumask = 0xf;
static bool report_gart_errors;
@@ -74,57 +75,104 @@ static const char *f10h_nb_mce_desc[] = {
"ECC Error in the Probe Filter directory"
};
-static bool f12h_dc_mce(u16 ec)
+static const char * const f15h_ic_mce_desc[] = {
+ "UC during a demand linefill from L2",
+ "Parity error during data load from IC",
+ "Parity error for IC valid bit",
+ "Main tag parity error",
+ "Parity error in prediction queue",
+ "PFB data/address parity error",
+ "Parity error in the branch status reg",
+ "PFB promotion address error",
+ "Tag error during probe/victimization",
+ "Parity error for IC probe tag valid bit",
+ "PFB non-cacheable bit parity error",
+ "PFB valid bit parity error", /* xec = 0xd */
+ "patch RAM", /* xec = 010 */
+ "uop queue",
+ "insn buffer",
+ "predecode buffer",
+ "fetch address FIFO"
+};
+
+static const char * const f15h_cu_mce_desc[] = {
+ "Fill ECC error on data fills", /* xec = 0x4 */
+ "Fill parity error on insn fills",
+ "Prefetcher request FIFO parity error",
+ "PRQ address parity error",
+ "PRQ data parity error",
+ "WCC Tag ECC error",
+ "WCC Data ECC error",
+ "WCB Data parity error",
+ "VB Data/ECC error",
+ "L2 Tag ECC error", /* xec = 0x10 */
+ "Hard L2 Tag ECC error",
+ "Multiple hits on L2 tag",
+ "XAB parity error",
+ "PRB address parity error"
+};
+
+static const char * const fr_ex_mce_desc[] = {
+ "CPU Watchdog timer expire",
+ "Wakeup array dest tag",
+ "AG payload array",
+ "EX payload array",
+ "IDRF array",
+ "Retire dispatch queue",
+ "Mapper checkpoint array",
+ "Physical register file EX0 port",
+ "Physical register file EX1 port",
+ "Physical register file AG0 port",
+ "Physical register file AG1 port",
+ "Flag register file",
+ "DE correctable error could not be corrected"
+};
+
+static bool f12h_dc_mce(u16 ec, u8 xec)
{
bool ret = false;
if (MEM_ERROR(ec)) {
- u8 ll = ec & 0x3;
+ u8 ll = LL(ec);
ret = true;
if (ll == LL_L2)
pr_cont("during L1 linefill from L2.\n");
else if (ll == LL_L1)
- pr_cont("Data/Tag %s error.\n", RRRR_MSG(ec));
+ pr_cont("Data/Tag %s error.\n", R4_MSG(ec));
else
ret = false;
}
return ret;
}
-static bool f10h_dc_mce(u16 ec)
+static bool f10h_dc_mce(u16 ec, u8 xec)
{
- u8 r4 = (ec >> 4) & 0xf;
- u8 ll = ec & 0x3;
-
- if (r4 == R4_GEN && ll == LL_L1) {
+ if (R4(ec) == R4_GEN && LL(ec) == LL_L1) {
pr_cont("during data scrub.\n");
return true;
}
- return f12h_dc_mce(ec);
+ return f12h_dc_mce(ec, xec);
}
-static bool k8_dc_mce(u16 ec)
+static bool k8_dc_mce(u16 ec, u8 xec)
{
if (BUS_ERROR(ec)) {
pr_cont("during system linefill.\n");
return true;
}
- return f10h_dc_mce(ec);
+ return f10h_dc_mce(ec, xec);
}
-static bool f14h_dc_mce(u16 ec)
+static bool f14h_dc_mce(u16 ec, u8 xec)
{
- u8 r4 = (ec >> 4) & 0xf;
- u8 ll = ec & 0x3;
- u8 tt = (ec >> 2) & 0x3;
- u8 ii = tt;
+ u8 r4 = R4(ec);
bool ret = true;
if (MEM_ERROR(ec)) {
- if (tt != TT_DATA || ll != LL_L1)
+ if (TT(ec) != TT_DATA || LL(ec) != LL_L1)
return false;
switch (r4) {
@@ -144,7 +192,7 @@ static bool f14h_dc_mce(u16 ec)
}
} else if (BUS_ERROR(ec)) {
- if ((ii != II_MEM && ii != II_IO) || ll != LL_LG)
+ if ((II(ec) != II_MEM && II(ec) != II_IO) || LL(ec) != LL_LG)
return false;
pr_cont("System read data error on a ");
@@ -169,39 +217,78 @@ static bool f14h_dc_mce(u16 ec)
return ret;
}
+static bool f15h_dc_mce(u16 ec, u8 xec)
+{
+ bool ret = true;
+
+ if (MEM_ERROR(ec)) {
+
+ switch (xec) {
+ case 0x0:
+ pr_cont("Data Array access error.\n");
+ break;
+
+ case 0x1:
+ pr_cont("UC error during a linefill from L2/NB.\n");
+ break;
+
+ case 0x2:
+ case 0x11:
+ pr_cont("STQ access error.\n");
+ break;
+
+ case 0x3:
+ pr_cont("SCB access error.\n");
+ break;
+
+ case 0x10:
+ pr_cont("Tag error.\n");
+ break;
+
+ case 0x12:
+ pr_cont("LDQ access error.\n");
+ break;
+
+ default:
+ ret = false;
+ }
+ } else if (BUS_ERROR(ec)) {
+
+ if (!xec)
+ pr_cont("during system linefill.\n");
+ else
+ pr_cont(" Internal %s condition.\n",
+ ((xec == 1) ? "livelock" : "deadlock"));
+ } else
+ ret = false;
+
+ return ret;
+}
+
static void amd_decode_dc_mce(struct mce *m)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Data Cache Error: ");
/* TLB error signatures are the same across families */
if (TLB_ERROR(ec)) {
- u8 tt = (ec >> 2) & 0x3;
-
- if (tt == TT_DATA) {
+ if (TT(ec) == TT_DATA) {
pr_cont("%s TLB %s.\n", LL_MSG(ec),
- (xec ? "multimatch" : "parity error"));
+ ((xec == 2) ? "locked miss"
+ : (xec ? "multimatch" : "parity")));
return;
}
- else
- goto wrong_dc_mce;
- }
-
- if (!fam_ops->dc_mce(ec))
- goto wrong_dc_mce;
-
- return;
-
-wrong_dc_mce:
- pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
+ } else if (fam_ops->dc_mce(ec, xec))
+ ;
+ else
+ pr_emerg(HW_ERR "Corrupted DC MCE info?\n");
}
-static bool k8_ic_mce(u16 ec)
+static bool k8_ic_mce(u16 ec, u8 xec)
{
- u8 ll = ec & 0x3;
- u8 r4 = (ec >> 4) & 0xf;
+ u8 ll = LL(ec);
bool ret = true;
if (!MEM_ERROR(ec))
@@ -210,7 +297,7 @@ static bool k8_ic_mce(u16 ec)
if (ll == 0x2)
pr_cont("during a linefill from L2.\n");
else if (ll == 0x1) {
- switch (r4) {
+ switch (R4(ec)) {
case R4_IRD:
pr_cont("Parity error during data load.\n");
break;
@@ -233,15 +320,13 @@ static bool k8_ic_mce(u16 ec)
return ret;
}
-static bool f14h_ic_mce(u16 ec)
+static bool f14h_ic_mce(u16 ec, u8 xec)
{
- u8 ll = ec & 0x3;
- u8 tt = (ec >> 2) & 0x3;
- u8 r4 = (ec >> 4) & 0xf;
+ u8 r4 = R4(ec);
bool ret = true;
if (MEM_ERROR(ec)) {
- if (tt != 0 || ll != 1)
+ if (TT(ec) != 0 || LL(ec) != 1)
ret = false;
if (r4 == R4_IRD)
@@ -254,10 +339,36 @@ static bool f14h_ic_mce(u16 ec)
return ret;
}
+static bool f15h_ic_mce(u16 ec, u8 xec)
+{
+ bool ret = true;
+
+ if (!MEM_ERROR(ec))
+ return false;
+
+ switch (xec) {
+ case 0x0 ... 0xa:
+ pr_cont("%s.\n", f15h_ic_mce_desc[xec]);
+ break;
+
+ case 0xd:
+ pr_cont("%s.\n", f15h_ic_mce_desc[xec-2]);
+ break;
+
+ case 0x10 ... 0x14:
+ pr_cont("Decoder %s parity error.\n", f15h_ic_mce_desc[xec-4]);
+ break;
+
+ default:
+ ret = false;
+ }
+ return ret;
+}
+
static void amd_decode_ic_mce(struct mce *m)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Instruction Cache Error: ");
@@ -268,7 +379,7 @@ static void amd_decode_ic_mce(struct mce *m)
bool k8 = (boot_cpu_data.x86 == 0xf && (m->status & BIT_64(58)));
pr_cont("during %s.\n", (k8 ? "system linefill" : "NB data read"));
- } else if (fam_ops->ic_mce(ec))
+ } else if (fam_ops->ic_mce(ec, xec))
;
else
pr_emerg(HW_ERR "Corrupted IC MCE info?\n");
@@ -276,8 +387,8 @@ static void amd_decode_ic_mce(struct mce *m)
static void amd_decode_bu_mce(struct mce *m)
{
- u32 ec = m->status & 0xffff;
- u32 xec = (m->status >> 16) & 0xf;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
pr_emerg(HW_ERR "Bus Unit Error");
@@ -286,23 +397,23 @@ static void amd_decode_bu_mce(struct mce *m)
else if (xec == 0x3)
pr_cont(" in the victim data buffers.\n");
else if (xec == 0x2 && MEM_ERROR(ec))
- pr_cont(": %s error in the L2 cache tags.\n", RRRR_MSG(ec));
+ pr_cont(": %s error in the L2 cache tags.\n", R4_MSG(ec));
else if (xec == 0x0) {
if (TLB_ERROR(ec))
pr_cont(": %s error in a Page Descriptor Cache or "
"Guest TLB.\n", TT_MSG(ec));
else if (BUS_ERROR(ec))
pr_cont(": %s/ECC error in data read from NB: %s.\n",
- RRRR_MSG(ec), PP_MSG(ec));
+ R4_MSG(ec), PP_MSG(ec));
else if (MEM_ERROR(ec)) {
- u8 rrrr = (ec >> 4) & 0xf;
+ u8 r4 = R4(ec);
- if (rrrr >= 0x7)
+ if (r4 >= 0x7)
pr_cont(": %s error during data copyback.\n",
- RRRR_MSG(ec));
- else if (rrrr <= 0x1)
+ R4_MSG(ec));
+ else if (r4 <= 0x1)
pr_cont(": %s parity/ECC error during data "
- "access from L2.\n", RRRR_MSG(ec));
+ "access from L2.\n", R4_MSG(ec));
else
goto wrong_bu_mce;
} else
@@ -316,12 +427,52 @@ wrong_bu_mce:
pr_emerg(HW_ERR "Corrupted BU MCE info?\n");
}
+static void amd_decode_cu_mce(struct mce *m)
+{
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
+
+ pr_emerg(HW_ERR "Combined Unit Error: ");
+
+ if (TLB_ERROR(ec)) {
+ if (xec == 0x0)
+ pr_cont("Data parity TLB read error.\n");
+ else if (xec == 0x1)
+ pr_cont("Poison data provided for TLB fill.\n");
+ else
+ goto wrong_cu_mce;
+ } else if (BUS_ERROR(ec)) {
+ if (xec > 2)
+ goto wrong_cu_mce;
+
+ pr_cont("Error during attempted NB data read.\n");
+ } else if (MEM_ERROR(ec)) {
+ switch (xec) {
+ case 0x4 ... 0xc:
+ pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x4]);
+ break;
+
+ case 0x10 ... 0x14:
+ pr_cont("%s.\n", f15h_cu_mce_desc[xec - 0x7]);
+ break;
+
+ default:
+ goto wrong_cu_mce;
+ }
+ }
+
+ return;
+
+wrong_cu_mce:
+ pr_emerg(HW_ERR "Corrupted CU MCE info?\n");
+}
+
static void amd_decode_ls_mce(struct mce *m)
{
- u16 ec = m->status & 0xffff;
- u8 xec = (m->status >> 16) & 0xf;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, xec_mask);
- if (boot_cpu_data.x86 == 0x14) {
+ if (boot_cpu_data.x86 >= 0x14) {
pr_emerg("You shouldn't be seeing an LS MCE on this cpu family,"
" please report on LKML.\n");
return;
@@ -330,12 +481,12 @@ static void amd_decode_ls_mce(struct mce *m)
pr_emerg(HW_ERR "Load Store Error");
if (xec == 0x0) {
- u8 r4 = (ec >> 4) & 0xf;
+ u8 r4 = R4(ec);
if (!BUS_ERROR(ec) || (r4 != R4_DRD && r4 != R4_DWR))
goto wrong_ls_mce;
- pr_cont(" during %s.\n", RRRR_MSG(ec));
+ pr_cont(" during %s.\n", R4_MSG(ec));
} else
goto wrong_ls_mce;
@@ -410,6 +561,15 @@ static bool f10h_nb_mce(u16 ec, u8 xec)
goto out;
break;
+ case 0x19:
+ if (boot_cpu_data.x86 == 0x15)
+ pr_cont("Compute Unit Data Error.\n");
+ else
+ ret = false;
+
+ goto out;
+ break;
+
case 0x1c ... 0x1f:
offset = 24;
break;
@@ -434,27 +594,30 @@ static bool nb_noop_mce(u16 ec, u8 xec)
void amd_decode_nb_mce(int node_id, struct mce *m, u32 nbcfg)
{
- u8 xec = (m->status >> 16) & 0x1f;
- u16 ec = m->status & 0xffff;
+ u16 ec = EC(m->status);
+ u8 xec = XEC(m->status, 0x1f);
u32 nbsh = (u32)(m->status >> 32);
+ int core = -1;
- pr_emerg(HW_ERR "Northbridge Error, node %d: ", node_id);
+ pr_emerg(HW_ERR "Northbridge Error (node %d", node_id);
- /*
- * F10h, revD can disable ErrCpu[3:0] so check that first and also the
- * value encoding has changed so interpret those differently
- */
+ /* F10h, revD can disable ErrCpu[3:0] through ErrCpuVal */
if ((boot_cpu_data.x86 == 0x10) &&
(boot_cpu_data.x86_model > 7)) {
if (nbsh & K8_NBSH_ERR_CPU_VAL)
- pr_cont(", core: %u", (u8)(nbsh & nb_err_cpumask));
+ core = nbsh & nb_err_cpumask;
} else {
u8 assoc_cpus = nbsh & nb_err_cpumask;
if (assoc_cpus > 0)
- pr_cont(", core: %d", fls(assoc_cpus) - 1);
+ core = fls(assoc_cpus) - 1;
}
+ if (core >= 0)
+ pr_cont(", core %d): ", core);
+ else
+ pr_cont("): ");
+
switch (xec) {
case 0x2:
pr_cont("Sync error (sync packets on HT link detected).\n");
@@ -496,35 +659,89 @@ EXPORT_SYMBOL_GPL(amd_decode_nb_mce);
static void amd_decode_fr_mce(struct mce *m)
{
- if (boot_cpu_data.x86 == 0xf ||
- boot_cpu_data.x86 == 0x11)
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+ u8 xec = XEC(m->status, xec_mask);
+
+ if (c->x86 == 0xf || c->x86 == 0x11)
goto wrong_fr_mce;
- /* we have only one error signature so match all fields at once. */
- if ((m->status & 0xffff) == 0x0f0f) {
- pr_emerg(HW_ERR "FR Error: CPU Watchdog timer expire.\n");
- return;
- }
+ if (c->x86 != 0x15 && xec != 0x0)
+ goto wrong_fr_mce;
+
+ pr_emerg(HW_ERR "%s Error: ",
+ (c->x86 == 0x15 ? "Execution Unit" : "FIROB"));
+
+ if (xec == 0x0 || xec == 0xc)
+ pr_cont("%s.\n", fr_ex_mce_desc[xec]);
+ else if (xec < 0xd)
+ pr_cont("%s parity error.\n", fr_ex_mce_desc[xec]);
+ else
+ goto wrong_fr_mce;
+
+ return;
wrong_fr_mce:
pr_emerg(HW_ERR "Corrupted FR MCE info?\n");
}
+static void amd_decode_fp_mce(struct mce *m)
+{
+ u8 xec = XEC(m->status, xec_mask);
+
+ pr_emerg(HW_ERR "Floating Point Unit Error: ");
+
+ switch (xec) {
+ case 0x1:
+ pr_cont("Free List");
+ break;
+
+ case 0x2:
+ pr_cont("Physical Register File");
+ break;
+
+ case 0x3:
+ pr_cont("Retire Queue");
+ break;
+
+ case 0x4:
+ pr_cont("Scheduler table");
+ break;
+
+ case 0x5:
+ pr_cont("Status Register File");
+ break;
+
+ default:
+ goto wrong_fp_mce;
+ break;
+ }
+
+ pr_cont(" parity error.\n");
+
+ return;
+
+wrong_fp_mce:
+ pr_emerg(HW_ERR "Corrupted FP MCE info?\n");
+}
+
static inline void amd_decode_err_code(u16 ec)
{
- if (TLB_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s, Cache Level: %s\n",
- TT_MSG(ec), LL_MSG(ec));
- } else if (MEM_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s, Type: %s, Cache Level: %s\n",
- RRRR_MSG(ec), TT_MSG(ec), LL_MSG(ec));
- } else if (BUS_ERROR(ec)) {
- pr_emerg(HW_ERR "Transaction: %s (%s), %s, Cache Level: %s, "
- "Participating Processor: %s\n",
- RRRR_MSG(ec), II_MSG(ec), TO_MSG(ec), LL_MSG(ec),
- PP_MSG(ec));
- } else
- pr_emerg(HW_ERR "Huh? Unknown MCE error 0x%x\n", ec);
+
+ pr_emerg(HW_ERR "cache level: %s", LL_MSG(ec));
+
+ if (BUS_ERROR(ec))
+ pr_cont(", mem/io: %s", II_MSG(ec));
+ else
+ pr_cont(", tx: %s", TT_MSG(ec));
+
+ if (MEM_ERROR(ec) || BUS_ERROR(ec)) {
+ pr_cont(", mem-tx: %s", R4_MSG(ec));
+
+ if (BUS_ERROR(ec))
+ pr_cont(", part-proc: %s (%s)", PP_MSG(ec), TO_MSG(ec));
+ }
+
+ pr_cont("\n");
}
/*
@@ -546,25 +763,32 @@ static bool amd_filter_mce(struct mce *m)
int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
{
struct mce *m = (struct mce *)data;
+ struct cpuinfo_x86 *c = &boot_cpu_data;
int node, ecc;
if (amd_filter_mce(m))
return NOTIFY_STOP;
- pr_emerg(HW_ERR "MC%d_STATUS: ", m->bank);
+ pr_emerg(HW_ERR "MC%d_STATUS[%s|%s|%s|%s|%s",
+ m->bank,
+ ((m->status & MCI_STATUS_OVER) ? "Over" : "-"),
+ ((m->status & MCI_STATUS_UC) ? "UE" : "CE"),
+ ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
+ ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
+ ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
- pr_cont("%sorrected error, other errors lost: %s, "
- "CPU context corrupt: %s",
- ((m->status & MCI_STATUS_UC) ? "Unc" : "C"),
- ((m->status & MCI_STATUS_OVER) ? "yes" : "no"),
- ((m->status & MCI_STATUS_PCC) ? "yes" : "no"));
+ if (c->x86 == 0x15)
+ pr_cont("|%s|%s",
+ ((m->status & BIT_64(44)) ? "Deferred" : "-"),
+ ((m->status & BIT_64(43)) ? "Poison" : "-"));
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
- pr_cont(", %sECC Error", ((ecc == 2) ? "C" : "U"));
+ pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
+
+ pr_cont("]: 0x%016llx\n", m->status);
- pr_cont("\n");
switch (m->bank) {
case 0:
@@ -576,7 +800,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
break;
case 2:
- amd_decode_bu_mce(m);
+ if (c->x86 == 0x15)
+ amd_decode_cu_mce(m);
+ else
+ amd_decode_bu_mce(m);
break;
case 3:
@@ -592,6 +819,10 @@ int amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
amd_decode_fr_mce(m);
break;
+ case 6:
+ amd_decode_fp_mce(m);
+ break;
+
default:
break;
}
@@ -608,18 +839,21 @@ static struct notifier_block amd_mce_dec_nb = {
static int __init mce_amd_init(void)
{
- if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD)
+ struct cpuinfo_x86 *c = &boot_cpu_data;
+
+ if (c->x86_vendor != X86_VENDOR_AMD)
return 0;
- if ((boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x12) &&
- (boot_cpu_data.x86 != 0x14 || boot_cpu_data.x86_model > 0xf))
+ if ((c->x86 < 0xf || c->x86 > 0x12) &&
+ (c->x86 != 0x14 || c->x86_model > 0xf) &&
+ (c->x86 != 0x15 || c->x86_model > 0xf))
return 0;
fam_ops = kzalloc(sizeof(struct amd_decoder_ops), GFP_KERNEL);
if (!fam_ops)
return -ENOMEM;
- switch (boot_cpu_data.x86) {
+ switch (c->x86) {
case 0xf:
fam_ops->dc_mce = k8_dc_mce;
fam_ops->ic_mce = k8_ic_mce;
@@ -651,9 +885,15 @@ static int __init mce_amd_init(void)
fam_ops->nb_mce = nb_noop_mce;
break;
+ case 0x15:
+ xec_mask = 0x1f;
+ fam_ops->dc_mce = f15h_dc_mce;
+ fam_ops->ic_mce = f15h_ic_mce;
+ fam_ops->nb_mce = f10h_nb_mce;
+ break;
+
default:
- printk(KERN_WARNING "Huh? What family is that: %d?!\n",
- boot_cpu_data.x86);
+ printk(KERN_WARNING "Huh? What family is that: %d?!\n", c->x86);
kfree(fam_ops);
return -EINVAL;
}
diff --git a/drivers/edac/mce_amd.h b/drivers/edac/mce_amd.h
index 35f6e0e3b297..45dda47173f2 100644
--- a/drivers/edac/mce_amd.h
+++ b/drivers/edac/mce_amd.h
@@ -7,8 +7,8 @@
#define BIT_64(n) (U64_C(1) << (n))
-#define ERROR_CODE(x) ((x) & 0xffff)
-#define EXT_ERROR_CODE(x) (((x) >> 16) & 0x1f)
+#define EC(x) ((x) & 0xffff)
+#define XEC(x, mask) (((x) >> 16) & mask)
#define LOW_SYNDROME(x) (((x) >> 15) & 0xff)
#define HIGH_SYNDROME(x) (((x) >> 24) & 0xff)
@@ -21,15 +21,15 @@
#define TT_MSG(x) tt_msgs[TT(x)]
#define II(x) (((x) >> 2) & 0x3)
#define II_MSG(x) ii_msgs[II(x)]
-#define LL(x) (((x) >> 0) & 0x3)
+#define LL(x) ((x) & 0x3)
#define LL_MSG(x) ll_msgs[LL(x)]
#define TO(x) (((x) >> 8) & 0x1)
#define TO_MSG(x) to_msgs[TO(x)]
#define PP(x) (((x) >> 9) & 0x3)
#define PP_MSG(x) pp_msgs[PP(x)]
-#define RRRR(x) (((x) >> 4) & 0xf)
-#define RRRR_MSG(x) ((RRRR(x) < 9) ? rrrr_msgs[RRRR(x)] : "Wrong R4!")
+#define R4(x) (((x) >> 4) & 0xf)
+#define R4_MSG(x) ((R4(x) < 9) ? rrrr_msgs[R4(x)] : "Wrong R4!")
#define K8_NBSH 0x4C
@@ -100,8 +100,8 @@ struct err_regs {
* per-family decoder ops
*/
struct amd_decoder_ops {
- bool (*dc_mce)(u16);
- bool (*ic_mce)(u16);
+ bool (*dc_mce)(u16, u8);
+ bool (*ic_mce)(u16, u8);
bool (*nb_mce)(u16, u8);
};
diff --git a/drivers/edac/mce_amd_inj.c b/drivers/edac/mce_amd_inj.c
index 39faded3cadd..733a7e7a8d6f 100644
--- a/drivers/edac/mce_amd_inj.c
+++ b/drivers/edac/mce_amd_inj.c
@@ -88,10 +88,11 @@ static ssize_t edac_inject_bank_store(struct kobject *kobj,
return -EINVAL;
}
- if (value > 5) {
- printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
- return -EINVAL;
- }
+ if (value > 5)
+ if (boot_cpu_data.x86 != 0x15 || value > 6) {
+ printk(KERN_ERR "Non-existant MCE bank: %lu\n", value);
+ return -EINVAL;
+ }
i_mce.bank = value;