summaryrefslogtreecommitdiffstats
path: root/sys/i386
diff options
context:
space:
mode:
authoravg <avg@FreeBSD.org>2009-12-02 15:45:55 +0000
committeravg <avg@FreeBSD.org>2009-12-02 15:45:55 +0000
commit934dd3fad58ce122202c6c08afa0c117be8e7759 (patch)
treefb58d2484ab3428e5e2b59894073ad73191f6768 /sys/i386
parentc1f03ab1df09bf1ca97e666c62dd66eaae160b40 (diff)
downloadFreeBSD-src-934dd3fad58ce122202c6c08afa0c117be8e7759.zip
FreeBSD-src-934dd3fad58ce122202c6c08afa0c117be8e7759.tar.gz
mca: improve status checking, recording and reporting
- directly print mca information in case we fail to allocate memory for a record - include bank number into mca record - print raw mca status value for extended information Reviewed by: jhb MFC after: 10 days
Diffstat (limited to 'sys/i386')
-rw-r--r--sys/i386/i386/mca.c111
-rw-r--r--sys/i386/include/mca.h1
2 files changed, 63 insertions, 49 deletions
diff --git a/sys/i386/i386/mca.c b/sys/i386/i386/mca.c
index 5e7e4ff..8af2091 100644
--- a/sys/i386/i386/mca.c
+++ b/sys/i386/i386/mca.c
@@ -117,48 +117,6 @@ sysctl_mca_records(SYSCTL_HANDLER_ARGS)
return (SYSCTL_OUT(req, &record, sizeof(record)));
}
-static struct mca_record *
-mca_record_entry(int bank)
-{
- struct mca_internal *rec;
- uint64_t status;
- u_int p[4];
-
- status = rdmsr(MSR_MC_STATUS(bank));
- if (!(status & MC_STATUS_VAL))
- return (NULL);
-
- rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT | M_ZERO);
- if (rec == NULL) {
- printf("MCA: Unable to allocate space for an event.\n");
- return (NULL);
- }
-
- /* Save exception information. */
- rec->rec.mr_status = status;
- if (status & MC_STATUS_ADDRV)
- rec->rec.mr_addr = rdmsr(MSR_MC_ADDR(bank));
- if (status & MC_STATUS_MISCV)
- rec->rec.mr_misc = rdmsr(MSR_MC_MISC(bank));
- rec->rec.mr_tsc = rdtsc();
- rec->rec.mr_apic_id = PCPU_GET(apic_id);
-
- /*
- * Clear machine check. Don't do this for uncorrectable
- * errors so that the BIOS can see them.
- */
- if (!(rec->rec.mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
- wrmsr(MSR_MC_STATUS(bank), 0);
- do_cpuid(0, p);
- }
-
- mtx_lock_spin(&mca_lock);
- STAILQ_INSERT_TAIL(&mca_records, rec, link);
- mca_count++;
- mtx_unlock_spin(&mca_lock);
- return (&rec->rec);
-}
-
static const char *
mca_error_ttype(uint16_t mca_error)
{
@@ -219,11 +177,13 @@ mca_error_request(uint16_t mca_error)
}
/* Dump details about a single machine check. */
-static void
-mca_log(struct mca_record *rec)
+static void __nonnull(1)
+mca_log(const struct mca_record *rec)
{
uint16_t mca_error;
+ printf("MCA: bank %d, status 0x%016llx\n", rec->mr_bank,
+ (long long)rec->mr_status);
printf("MCA: CPU %d ", rec->mr_apic_id);
if (rec->mr_status & MC_STATUS_UC)
printf("UNCOR ");
@@ -329,6 +289,59 @@ mca_log(struct mca_record *rec)
printf("MCA: Address 0x%llx\n", (long long)rec->mr_addr);
}
+static int __nonnull(2)
+mca_check_status(int bank, struct mca_record *rec)
+{
+ uint64_t status;
+ u_int p[4];
+
+ status = rdmsr(MSR_MC_STATUS(bank));
+ if (!(status & MC_STATUS_VAL))
+ return (0);
+
+ /* Save exception information. */
+ rec->mr_status = status;
+ rec->mr_bank = bank;
+ rec->mr_addr = 0;
+ if (status & MC_STATUS_ADDRV)
+ rec->mr_addr = rdmsr(MSR_MC_ADDR(bank));
+ rec->mr_misc = 0;
+ if (status & MC_STATUS_MISCV)
+ rec->mr_misc = rdmsr(MSR_MC_MISC(bank));
+ rec->mr_tsc = rdtsc();
+ rec->mr_apic_id = PCPU_GET(apic_id);
+
+ /*
+ * Clear machine check. Don't do this for uncorrectable
+ * errors so that the BIOS can see them.
+ */
+ if (!(rec->mr_status & (MC_STATUS_PCC | MC_STATUS_UC))) {
+ wrmsr(MSR_MC_STATUS(bank), 0);
+ do_cpuid(0, p);
+ }
+ return (1);
+}
+
+static void __nonnull(1)
+mca_record_entry(const struct mca_record *record)
+{
+ struct mca_internal *rec;
+
+ rec = malloc(sizeof(*rec), M_MCA, M_NOWAIT);
+ if (rec == NULL) {
+ printf("MCA: Unable to allocate space for an event.\n");
+ mca_log(record);
+ return;
+ }
+
+ rec->rec = *record;
+ rec->logged = 0;
+ mtx_lock_spin(&mca_lock);
+ STAILQ_INSERT_TAIL(&mca_records, rec, link);
+ mca_count++;
+ mtx_unlock_spin(&mca_lock);
+}
+
/*
* This scans all the machine check banks of the current CPU to see if
* there are any machine checks. Any non-recoverable errors are
@@ -341,7 +354,7 @@ mca_log(struct mca_record *rec)
static int
mca_scan(int mcip)
{
- struct mca_record *rec;
+ struct mca_record rec;
uint64_t mcg_cap, ucmask;
int count, i, recoverable;
@@ -354,13 +367,13 @@ mca_scan(int mcip)
ucmask |= MC_STATUS_OVER;
mcg_cap = rdmsr(MSR_MCG_CAP);
for (i = 0; i < (mcg_cap & MCG_CAP_COUNT); i++) {
- rec = mca_record_entry(i);
- if (rec != NULL) {
+ if (mca_check_status(i, &rec)) {
count++;
- if (rec->mr_status & ucmask) {
+ if (rec.mr_status & ucmask) {
recoverable = 0;
- mca_log(rec);
+ mca_log(&rec);
}
+ mca_record_entry(&rec);
}
}
return (mcip ? recoverable : count);
diff --git a/sys/i386/include/mca.h b/sys/i386/include/mca.h
index c43d989..ddc3aeb 100644
--- a/sys/i386/include/mca.h
+++ b/sys/i386/include/mca.h
@@ -36,6 +36,7 @@ struct mca_record {
uint64_t mr_misc;
uint64_t mr_tsc;
int mr_apic_id;
+ int mr_bank;
};
#ifdef _KERNEL
OpenPOWER on IntegriCloud