summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 09:11:39 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-03-08 09:11:39 -0800
commite13284da944df29ab08e8a9d2a50fc0ad1d858ab (patch)
tree8e6e2580d27cf4fe5f712e0857dc495aa52fd27e
parent1b37b8c48d2c2d8553f116ec2a75d21056f1fb35 (diff)
parent41f035a86b5b72a4f947c38e94239d20d595352a (diff)
downloadlinux-0-day-e13284da944df29ab08e8a9d2a50fc0ad1d858ab.tar.gz
linux-0-day-e13284da944df29ab08e8a9d2a50fc0ad1d858ab.tar.xz
Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull RAS updates from Borislav Petkov: "This time around we have in store: - Disable MC4_MISC thresholding banks on all AMD family 0x15 models (Shirish S) - AMD MCE error descriptions update and error decode improvements (Yazen Ghannam) - The usual smaller conversions and fixes" * 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mce: Improve error message when kernel cannot recover, p2 EDAC/mce_amd: Decode MCA_STATUS in bit definition order EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit EDAC, mce_amd: Print ExtErrorCode and description on a single line EDAC, mce_amd: Match error descriptions to latest documentation x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types RAS: Add a MAINTAINERS entry RAS: Use consistent types for UUIDs x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models x86/MCE: Switch to use the new generic UUID API
-rw-r--r--MAINTAINERS10
-rw-r--r--arch/x86/include/asm/mce.h7
-rw-r--r--arch/x86/kernel/cpu/mce/amd.c62
-rw-r--r--arch/x86/kernel/cpu/mce/apei.c10
-rw-r--r--arch/x86/kernel/cpu/mce/core.c30
-rw-r--r--arch/x86/kernel/cpu/mce/severity.c5
-rw-r--r--drivers/edac/mce_amd.c291
-rw-r--r--drivers/ras/ras.c2
-rw-r--r--include/ras/ras_event.h8
9 files changed, 282 insertions, 143 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index f758445317c5..58de951ee4ba 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -12960,6 +12960,16 @@ M: Alexandre Bounine <alex.bou9@gmail.com>
S: Maintained
F: drivers/rapidio/
+RAS INFRASTRUCTURE
+M: Tony Luck <tony.luck@intel.com>
+M: Borislav Petkov <bp@alien8.de>
+L: linux-edac@vger.kernel.org
+S: Maintained
+F: drivers/ras/
+F: include/linux/ras.h
+F: include/ras/ras_event.h
+F: Documentation/admin-guide/ras.rst
+
RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
L: linux-wireless@vger.kernel.org
S: Orphan
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index c1a812bd5a27..22d05e3835f0 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -48,6 +48,7 @@
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
+#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
/*
* McaX field if set indicates a given bank supports MCA extensions:
@@ -307,11 +308,17 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */
+ SMCA_CS_V2, /* Coherent Slave */
SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */
SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */
+ SMCA_PSP_V2, /* Platform Security Processor */
SMCA_SMU, /* System Management Unit */
+ SMCA_SMU_V2, /* System Management Unit */
+ SMCA_MP5, /* Microprocessor 5 Unit */
+ SMCA_NBIO, /* Northbridge IO Unit */
+ SMCA_PCIE, /* PCI Express Unit */
N_SMCA_BANK_TYPES
};
diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c
index 89298c83de53..e64de5149e50 100644
--- a/arch/x86/kernel/cpu/mce/amd.c
+++ b/arch/x86/kernel/cpu/mce/amd.c
@@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" },
+ [SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" },
+ [SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" },
+ [SMCA_SMU_V2] = { "smu", "System Management Unit" },
+ [SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
+ [SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
+ [SMCA_PCIE] = { "pcie", "PCI Express Unit" },
};
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */
- { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF },
+ { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */
- { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF },
+ { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
- { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF },
+ { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
+ { SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */
- { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F },
+ { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
+ { SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
+ { SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
+
+ /* Microprocessor 5 Unit MCA type */
+ { SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
+
+ /* Northbridge IO Unit MCA type */
+ { SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
+
+ /* PCI Express Unit MCA type */
+ { SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
};
struct smca_bank smca_banks[MAX_NR_BANKS];
@@ -545,6 +563,40 @@ out:
return offset;
}
+/*
+ * Turn off MC4_MISC thresholding banks on all family 0x15 models since
+ * they're not supported there.
+ */
+void disable_err_thresholding(struct cpuinfo_x86 *c)
+{
+ int i;
+ u64 hwcr;
+ bool need_toggle;
+ u32 msrs[] = {
+ 0x00000413, /* MC4_MISC0 */
+ 0xc0000408, /* MC4_MISC1 */
+ };
+
+ if (c->x86 != 0x15)
+ return;
+
+ rdmsrl(MSR_K7_HWCR, hwcr);
+
+ /* McStatusWrEn has to be set */
+ need_toggle = !(hwcr & BIT(18));
+
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
+
+ /* Clear CntP bit safely */
+ for (i = 0; i < ARRAY_SIZE(msrs); i++)
+ msr_clear_bit(msrs[i], 62);
+
+ /* restore old settings */
+ if (need_toggle)
+ wrmsrl(MSR_K7_HWCR, hwcr);
+}
+
/* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c)
{
@@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id();
int offset = -1;
+ disable_err_thresholding(c);
+
for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca)
smca_configure(bank, cpu);
diff --git a/arch/x86/kernel/cpu/mce/apei.c b/arch/x86/kernel/cpu/mce/apei.c
index 1d9b3ce662a0..c038e5c00a59 100644
--- a/arch/x86/kernel/cpu/mce/apei.c
+++ b/arch/x86/kernel/cpu/mce/apei.c
@@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \
- UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
- 0x64, 0x90, 0xb8, 0x9d)
+ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
+ 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \
- UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
- 0x04, 0x4a, 0x38, 0xfc)
+ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
+ 0x04, 0x4a, 0x38, 0xfc)
/*
* CPER specification (in UEFI specification 2.3 appendix N) requires
@@ -135,7 +135,7 @@ retry:
goto out;
/* try to skip other type records in storage */
else if (rc != sizeof(rcd) ||
- uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
+ !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry;
memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m);
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 6ce290c506d9..b7fb541a4873 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1;
- /*
- * Turn off MC4_MISC thresholding banks on those models since
- * they're not supported there.
- */
- if (c->x86 == 0x15 &&
- (c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
- int i;
- u64 hwcr;
- bool need_toggle;
- u32 msrs[] = {
- 0x00000413, /* MC4_MISC0 */
- 0xc0000408, /* MC4_MISC1 */
- };
-
- rdmsrl(MSR_K7_HWCR, hwcr);
-
- /* McStatusWrEn has to be set */
- need_toggle = !(hwcr & BIT(18));
-
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
-
- /* Clear CntP bit safely */
- for (i = 0; i < ARRAY_SIZE(msrs); i++)
- msr_clear_bit(msrs[i], 62);
-
- /* restore old settings */
- if (need_toggle)
- wrmsrl(MSR_K7_HWCR, hwcr);
- }
}
if (c->x86_vendor == X86_VENDOR_INTEL) {
diff --git a/arch/x86/kernel/cpu/mce/severity.c b/arch/x86/kernel/cpu/mce/severity.c
index dc3e26e905a3..65201e180fe0 100644
--- a/arch/x86/kernel/cpu/mce/severity.c
+++ b/arch/x86/kernel/cpu/mce/severity.c
@@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL
),
+ MCESEV(
+ PANIC, "Instruction fetch error in kernel",
+ SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
+ KERNEL
+ ),
#endif
MCESEV(
PANIC, "Action required: unknown MCACOD",
diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c
index c605089d899f..0a1814dad6cf 100644
--- a/drivers/edac/mce_amd.c
+++ b/drivers/edac/mce_amd.c
@@ -151,138 +151,223 @@ static const char * const mc6_mce_desc[] = {
/* Scalable MCA error strings */
static const char * const smca_ls_mce_desc[] = {
- "Load queue parity",
- "Store queue parity",
- "Miss address buffer payload parity",
- "L1 TLB parity",
- "Reserved",
- "DC tag error type 6",
- "DC tag error type 1",
+ "Load queue parity error",
+ "Store queue parity error",
+ "Miss address buffer payload parity error",
+ "Level 1 TLB parity error",
+ "DC Tag error type 5",
+ "DC Tag error type 6",
+ "DC Tag error type 1",
"Internal error type 1",
"Internal error type 2",
- "Sys Read data error thread 0",
- "Sys read data error thread 1",
- "DC tag error type 2",
- "DC data error type 1 (poison consumption)",
- "DC data error type 2",
- "DC data error type 3",
- "DC tag error type 4",
- "L2 TLB parity",
+ "System Read Data Error Thread 0",
+ "System Read Data Error Thread 1",
+ "DC Tag error type 2",
+ "DC Data error type 1 and poison consumption",
+ "DC Data error type 2",
+ "DC Data error type 3",
+ "DC Tag error type 4",
+ "Level 2 TLB parity error",
"PDC parity error",
- "DC tag error type 3",
- "DC tag error type 5",
- "L2 fill data error",
+ "DC Tag error type 3",
+ "DC Tag error type 5",
+ "L2 Fill Data error",
};
static const char * const smca_if_mce_desc[] = {
- "microtag probe port parity error",
- "IC microtag or full tag multi-hit error",
- "IC full tag parity",
- "IC data array parity",
- "Decoupling queue phys addr parity error",
- "L0 ITLB parity error",
- "L1 ITLB parity error",
- "L2 ITLB parity error",
- "BPQ snoop parity on Thread 0",
- "BPQ snoop parity on Thread 1",
- "L1 BTB multi-match error",
- "L2 BTB multi-match error",
- "L2 Cache Response Poison error",
- "System Read Data error",
+ "Op Cache Microtag Probe Port Parity Error",
+ "IC Microtag or Full Tag Multi-hit Error",
+ "IC Full Tag Parity Error",
+ "IC Data Array Parity Error",
+ "Decoupling Queue PhysAddr Parity Error",
+ "L0 ITLB Parity Error",
+ "L1 ITLB Parity Error",
+ "L2 ITLB Parity Error",
+ "BPQ Thread 0 Snoop Parity Error",
+ "BPQ Thread 1 Snoop Parity Error",
+ "L1 BTB Multi-Match Error",
+ "L2 BTB Multi-Match Error",
+ "L2 Cache Response Poison Error",
+ "System Read Data Error",
};
static const char * const smca_l2_mce_desc[] = {
- "L2M tag multi-way-hit error",
- "L2M tag ECC error",
- "L2M data ECC error",
- "HW assert",
+ "L2M Tag Multiple-Way-Hit error",
+ "L2M Tag or State Array ECC Error",
+ "L2M Data Array ECC Error",
+ "Hardware Assert Error",
};
static const char * const smca_de_mce_desc[] = {
- "uop cache tag parity error",
- "uop cache data parity error",
- "Insn buffer parity error",
- "uop queue parity error",
- "Insn dispatch queue parity error",
- "Fetch address FIFO parity",
- "Patch RAM data parity",
- "Patch RAM sequencer parity",
- "uop buffer parity"
+ "Micro-op cache tag parity error",
+ "Micro-op cache data parity error",
+ "Instruction buffer parity error",
+ "Micro-op queue parity error",
+ "Instruction dispatch queue parity error",
+ "Fetch address FIFO parity error",
+ "Patch RAM data parity error",
+ "Patch RAM sequencer parity error",
+ "Micro-op buffer parity error"
};
static const char * const smca_ex_mce_desc[] = {
- "Watchdog timeout error",
- "Phy register file parity",
- "Flag register file parity",
- "Immediate displacement register file parity",
- "Address generator payload parity",
- "EX payload parity",
- "Checkpoint queue parity",
- "Retire dispatch queue parity",
+ "Watchdog Timeout error",
+ "Physical register file parity error",
+ "Flag register file parity error",
+ "Immediate displacement register file parity error",
+ "Address generator payload parity error",
+ "EX payload parity error",
+ "Checkpoint queue parity error",
+ "Retire dispatch queue parity error",
"Retire status queue parity error",
"Scheduling queue parity error",
"Branch buffer queue parity error",
+ "Hardware Assertion error",
};
static const char * const smca_fp_mce_desc[] = {
- "Physical register file parity",
- "Freelist parity error",
- "Schedule queue parity",
+ "Physical register file (PRF) parity error",
+ "Freelist (FL) parity error",
+ "Schedule queue parity error",
"NSQ parity error",
- "Retire queue parity",
- "Status register file parity",
+ "Retire queue (RQ) parity error",
+ "Status register file (SRF) parity error",
"Hardware assertion",
};
static const char * const smca_l3_mce_desc[] = {
- "Shadow tag macro ECC error",
- "Shadow tag macro multi-way-hit error",
- "L3M tag ECC error",
- "L3M tag multi-way-hit error",
- "L3M data ECC error",
- "XI parity, L3 fill done channel error",
- "L3 victim queue parity",
- "L3 HW assert",
+ "Shadow Tag Macro ECC Error",
+ "Shadow Tag Macro Multi-way-hit Error",
+ "L3M Tag ECC Error",
+ "L3M Tag Multi-way-hit Error",
+ "L3M Data ECC Error",
+ "SDP Parity Error or SystemReadDataError from XI",
+ "L3 Victim Queue Parity Error",
+ "L3 Hardware Assertion",
};
static const char * const smca_cs_mce_desc[] = {
- "Illegal request from transport layer",
- "Address violation",
- "Security violation",
- "Illegal response from transport layer",
- "Unexpected response",
- "Parity error on incoming request or probe response data",
- "Parity error on incoming read response data",
- "Atomic request parity",
- "ECC error on probe filter access",
+ "Illegal Request",
+ "Address Violation",
+ "Security Violation",
+ "Illegal Response",
+ "Unexpected Response",
+ "Request or Probe Parity Error",
+ "Read Response Parity Error",
+ "Atomic Request Parity Error",
+ "Probe Filter ECC Error",
+};
+
+static const char * const smca_cs2_mce_desc[] = {
+ "Illegal Request",
+ "Address Violation",
+ "Security Violation",
+ "Illegal Response",
+ "Unexpected Response",
+ "Request or Probe Parity Error",
+ "Read Response Parity Error",
+ "Atomic Request Parity Error",
+ "SDP read response had no match in the CS queue",
+ "Probe Filter Protocol Error",
+ "Probe Filter ECC Error",
+ "SDP read response had an unexpected RETRY error",
+ "Counter overflow error",
+ "Counter underflow error",
};
static const char * const smca_pie_mce_desc[] = {
- "HW assert",
- "Internal PIE register security violation",
- "Error on GMI link",
- "Poison data written to internal PIE register",
+ "Hardware Assert",
+ "Register security violation",
+ "Link Error",
+ "Poison data consumption",
+ "A deferred error was detected in the DF"
};
static const char * const smca_umc_mce_desc[] = {
"DRAM ECC error",
- "Data poison error on DRAM",
+ "Data poison error",
"SDP parity error",
"Advanced peripheral bus error",
- "Command/address parity error",
+ "Address/Command parity error",
"Write data CRC error",
+ "DCQ SRAM ECC error",
+ "AES SRAM ECC error",
};
static const char * const smca_pb_mce_desc[] = {
- "Parameter Block RAM ECC error",
+ "An ECC error in the Parameter Block RAM array",
};
static const char * const smca_psp_mce_desc[] = {
- "PSP RAM ECC or parity error",
+ "An ECC or parity error in a PSP RAM instance",
+};
+
+static const char * const smca_psp2_mce_desc[] = {
+ "High SRAM ECC or parity error",
+ "Low SRAM ECC or parity error",
+ "Instruction Cache Bank 0 ECC or parity error",
+ "Instruction Cache Bank 1 ECC or parity error",
+ "Instruction Tag Ram 0 parity error",
+ "Instruction Tag Ram 1 parity error",
+ "Data Cache Bank 0 ECC or parity error",
+ "Data Cache Bank 1 ECC or parity error",
+ "Data Cache Bank 2 ECC or parity error",
+ "Data Cache Bank 3 ECC or parity error",
+ "Data Tag Bank 0 parity error",
+ "Data Tag Bank 1 parity error",
+ "Data Tag Bank 2 parity error",
+ "Data Tag Bank 3 parity error",
+ "Dirty Data Ram parity error",
+ "TLB Bank 0 parity error",
+ "TLB Bank 1 parity error",
+ "System Hub Read Buffer ECC or parity error",
};
static const char * const smca_smu_mce_desc[] = {
- "SMU RAM ECC or parity error",
+ "An ECC or parity error in an SMU RAM instance",
+};
+
+static const char * const smca_smu2_mce_desc[] = {
+ "High SRAM ECC or parity error",
+ "Low SRAM ECC or parity error",
+ "Data Cache Bank A ECC or parity error",
+ "Data Cache Bank B ECC or parity error",
+ "Data Tag Cache Bank A ECC or parity error",
+ "Data Tag Cache Bank B ECC or parity error",
+ "Instruction Cache Bank A ECC or parity error",
+ "Instruction Cache Bank B ECC or parity error",
+ "Instruction Tag Cache Bank A ECC or parity error",
+ "Instruction Tag Cache Bank B ECC or parity error",
+ "System Hub Read Buffer ECC or parity error",
+};
+
+static const char * const smca_mp5_mce_desc[] = {
+ "High SRAM ECC or parity error",
+ "Low SRAM ECC or parity error",
+ "Data Cache Bank A ECC or parity error",
+ "Data Cache Bank B ECC or parity error",
+ "Data Tag Cache Bank A ECC or parity error",
+ "Data Tag Cache Bank B ECC or parity error",
+ "Instruction Cache Bank A ECC or parity error",
+ "Instruction Cache Bank B ECC or parity error",
+ "Instruction Tag Cache Bank A ECC or parity error",
+ "Instruction Tag Cache Bank B ECC or parity error",
+};
+
+static const char * const smca_nbio_mce_desc[] = {
+ "ECC or Parity error",
+ "PCIE error",
+ "SDP ErrEvent error",
+ "SDP Egress Poison Error",
+ "IOHC Internal Poison Error",
+};
+
+static const char * const smca_pcie_mce_desc[] = {
+ "CCIX PER Message logging",
+ "CCIX Read Response with Status: Non-Data Error",
+ "CCIX Write Response with Status: Non-Data Error",
+ "CCIX Read Response with Status: Data Error",
+ "CCIX Non-okay write response with data error",
};
struct smca_mce_desc {
@@ -299,11 +384,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
+ [SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
+ [SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
+ [SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
+ [SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
+ [SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
+ [SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
};
static bool f12h_mc0_mce(u16 ec, u8 xec)
@@ -874,13 +965,12 @@ static void decode_smca_error(struct mce *m)
ip_name = smca_get_long_name(bank_type);
- pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec);
+ pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
/* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs &&
(hwid->xec_bitmap & BIT_ULL(xec))) {
- pr_emerg(HW_ERR "%s Error: ", ip_name);
- pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
+ pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
}
if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
@@ -961,26 +1051,18 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_UC) ? "UE" :
(m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
- ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"),
- ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"));
-
- if (fam >= 0x15) {
- pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
-
- /* F15h, bank4, bit 43 is part of McaStatSubCache. */
- if (fam != 0x15 || m->bank != 4)
- pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
- }
+ ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
+ ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
- pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
-
if (!rdmsr_safe(addr, &low, &high) &&
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
+
+ pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
}
/* do the two bits[14:13] together */
@@ -988,6 +1070,17 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
+ if (fam >= 0x15) {
+ pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
+
+ /* F15h, bank4, bit 43 is part of McaStatSubCache. */
+ if (fam != 0x15 || m->bank != 4)
+ pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
+ }
+
+ if (fam >= 0x17)
+ pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
+
pr_cont("]: 0x%016llx\n", m->status);
if (m->status & MCI_STATUS_ADDRV)
diff --git a/drivers/ras/ras.c b/drivers/ras/ras.c
index 3f38907320dc..95540ea8dd9d 100644
--- a/drivers/ras/ras.c
+++ b/drivers/ras/ras.c
@@ -14,7 +14,7 @@
#define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h>
-void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id,
+void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
const char *fru_text, const u8 sev, const u8 *err,
const u32 len)
{
diff --git a/include/ras/ras_event.h b/include/ras/ras_event.h
index a0794632fd01..36c5c5e38c1d 100644
--- a/include/ras/ras_event.h
+++ b/include/ras/ras_event.h
@@ -27,7 +27,7 @@
TRACE_EVENT(extlog_mem_event,
TP_PROTO(struct cper_sec_mem_err *mem,
u32 err_seq,
- const uuid_le *fru_id,
+ const guid_t *fru_id,
const char *fru_text,
u8 sev),
@@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
__field(u8, sev)
__field(u64, pa)
__field(u8, pa_mask_lsb)
- __field_struct(uuid_le, fru_id)
+ __field_struct(guid_t, fru_id)
__string(fru_text, fru_text)
__field_struct(struct cper_mem_err_compact, data)
),
@@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
*/
TRACE_EVENT(non_standard_event,
- TP_PROTO(const uuid_le *sec_type,
- const uuid_le *fru_id,
+ TP_PROTO(const guid_t *sec_type,
+ const guid_t *fru_id,
const char *fru_text,
const u8 sev,
const u8 *err,