summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorjhb <jhb@FreeBSD.org>2010-03-16 16:01:19 +0000
committerjhb <jhb@FreeBSD.org>2010-03-16 16:01:19 +0000
commit9654d2534603fd39e69cb4f621463cb0888e3bce (patch)
tree87e0e20099a2ed886881d2acca222b1904852533
parenta4d89c6f75eb549e90ca2a848b79d036379b9e6b (diff)
downloadFreeBSD-src-9654d2534603fd39e69cb4f621463cb0888e3bce.zip
FreeBSD-src-9654d2534603fd39e69cb4f621463cb0888e3bce.tar.gz
- Extend the machine check record structure to include several fields useful
for parsing model-specific and other fields in machine check events including the global machine check capabilities and status registers, CPU identification, and the FreeBSD CPU ID. - Report these added fields in the console log of a machine check so that a record structure can be reconstituted from the console messages. - Parse new architectural errors including memory controller errors. MFC after: 1 week
-rw-r--r--sys/amd64/amd64/mca.c52
-rw-r--r--sys/amd64/include/mca.h5
-rw-r--r--sys/amd64/include/specialreg.h12
-rw-r--r--sys/i386/i386/mca.c52
-rw-r--r--sys/i386/include/mca.h5
-rw-r--r--sys/i386/include/specialreg.h12
6 files changed, 132 insertions, 6 deletions
diff --git a/sys/amd64/amd64/mca.c b/sys/amd64/amd64/mca.c
index 344acaf..ed55c2c 100644
--- a/sys/amd64/amd64/mca.c
+++ b/sys/amd64/amd64/mca.c
@@ -186,19 +186,46 @@ mca_error_request(uint16_t mca_error)
return ("???");
}
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+ switch ((mca_error & 0x70) >> 4) {
+ case 0x0:
+ return ("GEN");
+ case 0x1:
+ return ("RD");
+ case 0x2:
+ return ("WR");
+ case 0x3:
+ return ("AC");
+ case 0x4:
+ return ("MS");
+ }
+ return ("???");
+}
+
/* Dump details about a single machine check. */
static void __nonnull(1)
mca_log(const struct mca_record *rec)
{
uint16_t mca_error;
- printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+ printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
- printf("MCA: CPU %d ", rec->mr_apic_id);
+ printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+ (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+ printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+ rec->mr_cpu_id, rec->mr_apic_id);
+ printf("MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
- else
+ else {
printf("COR ");
+ if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+ printf("(%lld) ", ((long long)rec->mr_status &
+ MC_STATUS_COR_COUNT) >> 38);
+ }
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
@@ -221,6 +248,9 @@ mca_log(const struct mca_record *rec)
case 0x0004:
printf("FRC error");
break;
+ case 0x0005:
+ printf("internal parity error");
+ break;
case 0x0400:
printf("internal timer error");
break;
@@ -245,6 +275,17 @@ mca_log(const struct mca_record *rec)
break;
}
+ /* Memory controller error. */
+ if ((mca_error & 0xef80) == 0x0080) {
+ printf("%s channel ", mca_error_mmtype(mca_error));
+ if ((mca_error & 0x000f) != 0x000f)
+ printf("%d", mca_error & 0x000f);
+ else
+ printf("??");
+ printf(" memory error");
+ break;
+ }
+
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
@@ -322,6 +363,11 @@ mca_check_status(int bank, struct mca_record *rec)
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->mr_tsc = rdtsc();
rec->mr_apic_id = PCPU_GET(apic_id);
+ rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+ rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+ rec->mr_cpu_id = cpu_id;
+ rec->mr_cpu_vendor_id = cpu_vendor_id;
+ rec->mr_cpu = PCPU_GET(cpuid);
/*
* Clear machine check. Don't do this for uncorrectable
diff --git a/sys/amd64/include/mca.h b/sys/amd64/include/mca.h
index ddc3aeb..bc09480 100644
--- a/sys/amd64/include/mca.h
+++ b/sys/amd64/include/mca.h
@@ -37,6 +37,11 @@ struct mca_record {
uint64_t mr_tsc;
int mr_apic_id;
int mr_bank;
+ uint64_t mr_mcg_cap;
+ uint64_t mr_mcg_status;
+ int mr_cpu_id;
+ int mr_cpu_vendor_id;
+ int mr_cpu;
};
#ifdef _KERNEL
diff --git a/sys/amd64/include/specialreg.h b/sys/amd64/include/specialreg.h
index b325ed4..baf2466 100644
--- a/sys/amd64/include/specialreg.h
+++ b/sys/amd64/include/specialreg.h
@@ -267,6 +267,7 @@
#define MSR_MTRR16kBase 0x258
#define MSR_MTRR4kBase 0x268
#define MSR_PAT 0x277
+#define MSR_MC0_CTL2 0x280
#define MSR_MTRRdefType 0x2ff
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
@@ -352,8 +353,10 @@
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
+#define MCG_CAP_CMCI_P 0x00000400
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
+#define MCG_CAP_SER_P 0x01000000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
@@ -363,9 +366,14 @@
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
+#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
#define MC_STATUS_MCA_ERROR 0x000000000000ffffUL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000UL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000UL
+#define MC_STATUS_COR_COUNT 0x001fffc000000000UL /* If MCG_CAP_TES_P */
+#define MC_STATUS_TES_STATUS 0x0060000000000000UL /* If MCG_CAP_TES_P */
+#define MC_STATUS_AR 0x0080000000000000UL /* If MCG_CAP_CMCI_P */
+#define MC_STATUS_S 0x0100000000000000UL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_PCC 0x0200000000000000UL
#define MC_STATUS_ADDRV 0x0400000000000000UL
#define MC_STATUS_MISCV 0x0800000000000000UL
@@ -373,6 +381,10 @@
#define MC_STATUS_UC 0x2000000000000000UL
#define MC_STATUS_OVER 0x4000000000000000UL
#define MC_STATUS_VAL 0x8000000000000000UL
+#define MC_MISC_RA_LSB 0x000000000000003fUL /* If MCG_CAP_SER_P */
+#define MC_MISC_ADDRESS_MODE 0x00000000000001c0UL /* If MCG_CAP_SER_P */
+#define MC_CTL2_THRESHOLD 0x0000000000003fffUL
+#define MC_CTL2_CMCI_EN 0x0000000040000000UL
/*
* The following four 3-byte registers control the non-cacheable regions.
diff --git a/sys/i386/i386/mca.c b/sys/i386/i386/mca.c
index 6148af7..9b9f945 100644
--- a/sys/i386/i386/mca.c
+++ b/sys/i386/i386/mca.c
@@ -177,19 +177,46 @@ mca_error_request(uint16_t mca_error)
return ("???");
}
+static const char *
+mca_error_mmtype(uint16_t mca_error)
+{
+
+ switch ((mca_error & 0x70) >> 4) {
+ case 0x0:
+ return ("GEN");
+ case 0x1:
+ return ("RD");
+ case 0x2:
+ return ("WR");
+ case 0x3:
+ return ("AC");
+ case 0x4:
+ return ("MS");
+ }
+ return ("???");
+}
+
/* Dump details about a single machine check. */
static void __nonnull(1)
mca_log(const struct mca_record *rec)
{
uint16_t mca_error;
- printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+ printf("MCA: Bank %d, Status 0x%016llx\n", rec->mr_bank,
(long long)rec->mr_status);
- printf("MCA: CPU %d ", rec->mr_apic_id);
+ printf("MCA: Global Cap 0x%016llx, Status 0x%016llx\n",
+ (long long)rec->mr_mcg_cap, (long long)rec->mr_mcg_status);
+ printf("MCA: Vendor \"%s\", ID 0x%x, APIC ID %d\n", cpu_vendor,
+ rec->mr_cpu_id, rec->mr_apic_id);
+ printf("MCA: CPU %d ", rec->mr_cpu);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
- else
+ else {
printf("COR ");
+ if (rec->mr_mcg_cap & MCG_CAP_TES_P)
+ printf("(%lld) ", ((long long)rec->mr_status &
+ MC_STATUS_COR_COUNT) >> 38);
+ }
if (rec->mr_status & MC_STATUS_PCC)
printf("PCC ");
if (rec->mr_status & MC_STATUS_OVER)
@@ -212,6 +239,9 @@ mca_log(const struct mca_record *rec)
case 0x0004:
printf("FRC error");
break;
+ case 0x0005:
+ printf("internal parity error");
+ break;
case 0x0400:
printf("internal timer error");
break;
@@ -236,6 +266,17 @@ mca_log(const struct mca_record *rec)
break;
}
+ /* Memory controller error. */
+ if ((mca_error & 0xef80) == 0x0080) {
+ printf("%s channel ", mca_error_mmtype(mca_error));
+ if ((mca_error & 0x000f) != 0x000f)
+ printf("%d", mca_error & 0x000f);
+ else
+ printf("??");
+ printf(" memory error");
+ break;
+ }
+
/* Cache error. */
if ((mca_error & 0xef00) == 0x0100) {
printf("%sCACHE %s %s error",
@@ -313,6 +354,11 @@ mca_check_status(int bank, struct mca_record *rec)
rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
rec->mr_tsc = rdtsc();
rec->mr_apic_id = PCPU_GET(apic_id);
+ rec->mr_mcg_cap = rdmsr(MSR_MCG_CAP);
+ rec->mr_mcg_status = rdmsr(MSR_MCG_STATUS);
+ rec->mr_cpu_id = cpu_id;
+ rec->mr_cpu_vendor_id = cpu_vendor_id;
+ rec->mr_cpu = PCPU_GET(cpuid);
/*
* Clear machine check. Don't do this for uncorrectable
diff --git a/sys/i386/include/mca.h b/sys/i386/include/mca.h
index ddc3aeb..bc09480 100644
--- a/sys/i386/include/mca.h
+++ b/sys/i386/include/mca.h
@@ -37,6 +37,11 @@ struct mca_record {
uint64_t mr_tsc;
int mr_apic_id;
int mr_bank;
+ uint64_t mr_mcg_cap;
+ uint64_t mr_mcg_status;
+ int mr_cpu_id;
+ int mr_cpu_vendor_id;
+ int mr_cpu;
};
#ifdef _KERNEL
diff --git a/sys/i386/include/specialreg.h b/sys/i386/include/specialreg.h
index e791a70..cbcc0fa 100644
--- a/sys/i386/include/specialreg.h
+++ b/sys/i386/include/specialreg.h
@@ -273,6 +273,7 @@
#define MSR_MTRR16kBase 0x258
#define MSR_MTRR4kBase 0x268
#define MSR_PAT 0x277
+#define MSR_MC0_CTL2 0x280
#define MSR_MTRRdefType 0x2ff
#define MSR_MC0_CTL 0x400
#define MSR_MC0_STATUS 0x401
@@ -421,8 +422,10 @@
#define MCG_CAP_COUNT 0x000000ff
#define MCG_CAP_CTL_P 0x00000100
#define MCG_CAP_EXT_P 0x00000200
+#define MCG_CAP_CMCI_P 0x00000400
#define MCG_CAP_TES_P 0x00000800
#define MCG_CAP_EXT_CNT 0x00ff0000
+#define MCG_CAP_SER_P 0x01000000
#define MCG_STATUS_RIPV 0x00000001
#define MCG_STATUS_EIPV 0x00000002
#define MCG_STATUS_MCIP 0x00000004
@@ -432,9 +435,14 @@
#define MSR_MC_STATUS(x) (MSR_MC0_STATUS + (x) * 4)
#define MSR_MC_ADDR(x) (MSR_MC0_ADDR + (x) * 4)
#define MSR_MC_MISC(x) (MSR_MC0_MISC + (x) * 4)
+#define MSR_MC_CTL2(x) (MSR_MC0_CTL2 + (x)) /* If MCG_CAP_CMCI_P */
#define MC_STATUS_MCA_ERROR 0x000000000000ffffULL
#define MC_STATUS_MODEL_ERROR 0x00000000ffff0000ULL
#define MC_STATUS_OTHER_INFO 0x01ffffff00000000ULL
+#define MC_STATUS_COR_COUNT 0x001fffc000000000ULL /* If MCG_CAP_TES_P */
+#define MC_STATUS_TES_STATUS 0x0060000000000000ULL /* If MCG_CAP_TES_P */
+#define MC_STATUS_AR 0x0080000000000000ULL /* If MCG_CAP_CMCI_P */
+#define MC_STATUS_S 0x0100000000000000ULL /* If MCG_CAP_CMCI_P */
#define MC_STATUS_PCC 0x0200000000000000ULL
#define MC_STATUS_ADDRV 0x0400000000000000ULL
#define MC_STATUS_MISCV 0x0800000000000000ULL
@@ -442,6 +450,10 @@
#define MC_STATUS_UC 0x2000000000000000ULL
#define MC_STATUS_OVER 0x4000000000000000ULL
#define MC_STATUS_VAL 0x8000000000000000ULL
+#define MC_MISC_RA_LSB 0x000000000000003fULL /* If MCG_CAP_SER_P */
+#define MC_MISC_ADDRESS_MODE 0x00000000000001c0ULL /* If MCG_CAP_SER_P */
+#define MC_CTL2_THRESHOLD 0x0000000000003fffULL
+#define MC_CTL2_CMCI_EN 0x0000000040000000ULL
/*
* The following four 3-byte registers control the non-cacheable regions.
OpenPOWER on IntegriCloud