diff options
author | Borislav Petkov <borislav.petkov@amd.com> | 2012-08-09 18:41:07 +0200 |
---|---|---|
committer | Borislav Petkov <bp@alien8.de> | 2012-11-28 11:45:01 +0100 |
commit | 66fed2d464157eb20c37738d75b281458dfc2cab (patch) | |
tree | 2ef45590cacc3c69d9901153f4dc1199e20b7fa1 | |
parent | 6e71a870b8ff2c1e2d89e5ea27a38cea39cefa3d (diff) | |
download | op-kernel-dev-66fed2d464157eb20c37738d75b281458dfc2cab.zip op-kernel-dev-66fed2d464157eb20c37738d75b281458dfc2cab.tar.gz |
amd64_edac: Improve error injection
When injecting DRAM ECC errors over the F3xB[8,C] interface, the machine
does this by injecting the error in the next non-cached access. This
takes relatively long time on a normal system so that in order for us to
expedite it, we disable the caches around the injection.
Signed-off-by: Borislav Petkov <borislav.petkov@amd.com>
-rw-r--r-- | drivers/edac/amd64_edac.c | 10 | ||||
-rw-r--r-- | drivers/edac/amd64_edac.h | 23 | ||||
-rw-r--r-- | drivers/edac/amd64_edac_inj.c | 18 |
3 files changed, 41 insertions, 10 deletions
diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 5960a8a..351496a 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -60,8 +60,8 @@ struct scrubrate { { 0x00, 0UL}, /* scrubbing off */ }; -static int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, - u32 *val, const char *func) +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func) { int err = 0; @@ -1980,11 +1980,11 @@ static void amd64_handle_ue(struct mem_ctl_info *mci, struct mce *m) static inline void __amd64_decode_bus_error(struct mem_ctl_info *mci, struct mce *m) { - u16 ec = EC(m->status); - u8 xec = XEC(m->status, 0x1f); u8 ecc_type = (m->status >> 45) & 0x3; + u8 xec = XEC(m->status, 0x1f); + u16 ec = EC(m->status); - /* Bail early out if this was an 'observed' error */ + /* Bail out early if this was an 'observed' error */ if (PP(ec) == NBSL_PP_OBS) return; diff --git a/drivers/edac/amd64_edac.h b/drivers/edac/amd64_edac.h index 19a12a4..cf7981e 100644 --- a/drivers/edac/amd64_edac.h +++ b/drivers/edac/amd64_edac.h @@ -273,9 +273,10 @@ #define SET_NB_ARRAY_ADDR(section) (((section) & 0x3) << 1) #define F10_NB_ARRAY_DATA 0xBC +#define F10_NB_ARR_ECC_WR_REQ BIT(17) #define SET_NB_DRAM_INJECTION_WRITE(inj) \ (BIT(((inj.word) & 0xF) + 20) | \ - BIT(17) | inj.bit_map) + F10_NB_ARR_ECC_WR_REQ | inj.bit_map) #define SET_NB_DRAM_INJECTION_READ(inj) \ (BIT(((inj.word) & 0xF) + 20) | \ BIT(16) | inj.bit_map) @@ -306,9 +307,9 @@ enum amd_families { /* Error injection control structure */ struct error_injection { - u32 section; - u32 word; - u32 bit_map; + u32 section; + u32 word; + u32 bit_map; }; /* low and high part of PCI config space regs */ @@ -460,6 +461,8 @@ struct amd64_family_type { struct low_ops ops; }; +int __amd64_read_pci_cfg_dword(struct pci_dev *pdev, int offset, + u32 *val, const char *func); int __amd64_write_pci_cfg_dword(struct pci_dev *pdev, int offset, u32 val, const char *func); @@ -476,3 +479,15 @@ int amd64_get_dram_hole_info(struct mem_ctl_info *mci, u64 *hole_base, u64 *hole_offset, u64 *hole_size); #define to_mci(k) container_of(k, struct mem_ctl_info, dev) + +/* Injection helpers */ +static inline void disable_caches(void *dummy) +{ + write_cr0(read_cr0() | X86_CR0_CD); + wbinvd(); +} + +static inline void enable_caches(void *dummy) +{ + write_cr0(read_cr0() & ~X86_CR0_CD); +} diff --git a/drivers/edac/amd64_edac_inj.c b/drivers/edac/amd64_edac_inj.c index 8977e2fa..8c171fa 100644 --- a/drivers/edac/amd64_edac_inj.c +++ b/drivers/edac/amd64_edac_inj.c @@ -153,8 +153,8 @@ static ssize_t amd64_inject_write_store(struct device *dev, { struct mem_ctl_info *mci = to_mci(dev); struct amd64_pvt *pvt = mci->pvt_info; + u32 section, word_bits, tmp; unsigned long value; - u32 section, word_bits; int ret; ret = strict_strtoul(data, 10, &value); @@ -168,9 +168,25 @@ static ssize_t amd64_inject_write_store(struct device *dev, word_bits = SET_NB_DRAM_INJECTION_WRITE(pvt->injection); + pr_notice_once("Don't forget to decrease MCE polling interval in\n" + "/sys/bus/machinecheck/devices/machinecheck<CPUNUM>/check_interval\n" + "so that you can get the error report faster.\n"); + + on_each_cpu(disable_caches, NULL, 1); + /* Issue 'word' and 'bit' along with the READ request */ amd64_write_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, word_bits); + retry: + /* wait until injection happens */ + amd64_read_pci_cfg(pvt->F3, F10_NB_ARRAY_DATA, &tmp); + if (tmp & F10_NB_ARR_ECC_WR_REQ) { + cpu_relax(); + goto retry; + } + + on_each_cpu(enable_caches, NULL, 1); + edac_dbg(0, "section=0x%x word_bits=0x%x\n", section, word_bits); return count; |