From a7d7d2e1a07e3811dc49af2962c940fd8bbb6c8f Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Jan 2012 14:12:32 -0300 Subject: edac: Create a dimm struct and move the labels into it MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The way a DIMM is currently represented implies that they're linked into a per-csrow struct. However, some drivers don't see csrows, as they're ridden behind some chip like the AMB's on FBDIMM's, for example. This forced drivers to fake^Wvirtualize a csrow struct, and to create a mess under csrow/channel original's concept. Move the DIMM labels into a per-DIMM struct, and add there the real location of the socket, in terms of csrow/channel. Latter patches will modify the location to properly represent the memory architecture. All other drivers will use a per-csrow type of location. Some of those drivers will require a latter conversion, as they also fake the csrows internally. TODO: While this patch doesn't change the existing behavior, on csrows-based memory controllers, a csrow/channel pair points to a memory rank. There's a known bug at the EDAC core that allows having different labels for the same DIMM, if it has more than one rank. A latter patch is need to merge the several ranks for a DIMM into the same dimm_info struct, in order to avoid having different labels for the same DIMM. The edac_mc_alloc() will now contain a per-dimm initialization loop that will be changed by latter patches in order to match other types of memory architectures. Reviewed-by: Aristeu Rozanski Reviewed-by: Borislav Petkov Cc: Doug Thompson Cc: Ranganathan Desikan Cc: "Arvind R." Cc: "Niklas Söderlund" Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index a203536..95901c2 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -651,8 +651,8 @@ static int get_dimm_config(const struct mem_ctl_info *mci) csr->channels[0].chan_idx = i; csr->channels[0].ce_count = 0; pvt->csrow_map[i][j] = csrow; - snprintf(csr->channels[0].label, - sizeof(csr->channels[0].label), + snprintf(csr->channels[0].dimm->label, + sizeof(csr->channels[0].dimm->label), "CPU_SrcID#%u_Channel#%u_DIMM#%u", pvt->sbridge_dev->source_id, i, j); last_page += npages; -- cgit v1.1 From 084a4fccef39ac7abb039511f32380f28d0b67e6 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Jan 2012 18:38:08 -0300 Subject: edac: move dimm properties to struct dimm_info MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On systems based on chip select rows, all channels need to use memories with the same properties, otherwise the memories on channels A and B won't be recognized. However, such assumption is not true for all types of memory controllers. Controllers for FB-DIMM's don't have such requirements. Also, modern Intel controllers seem to be capable of handling such differences. So, we need to get rid of storing the DIMM information into a per-csrow data, storing it, instead at the right place. The first step is to move grain, mtype, dtype and edac_mode to the per-dimm struct. Reviewed-by: Aristeu Rozanski Reviewed-by: Borislav Petkov Acked-by: Chris Metcalf Cc: Doug Thompson Cc: Borislav Petkov Cc: Mark Gross Cc: Jason Uhlenkott Cc: Tim Small Cc: Ranganathan Desikan Cc: "Arvind R." Cc: Olof Johansson Cc: Egor Martovetsky Cc: Michal Marek Cc: Jiri Kosina Cc: Joe Perches Cc: Dmitry Eremin-Solenikov Cc: Benjamin Herrenschmidt Cc: Hitoshi Mitake Cc: Andrew Morton Cc: James Bottomley Cc: "Niklas Söderlund" Cc: Shaohui Xie Cc: Josh Boyer Cc: Mike Williams Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 95901c2..21147ac 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -551,7 +551,7 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels, return 0; } -static int get_dimm_config(const struct mem_ctl_info *mci) +static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; struct csrow_info *csr; @@ -561,6 +561,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) u32 reg; enum edac_type mode; enum mem_type mtype; + struct dimm_info *dimm; pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); pvt->sbridge_dev->source_id = SOURCE_ID(reg); @@ -612,6 +613,7 @@ static int get_dimm_config(const struct mem_ctl_info *mci) /* On all supported DDR3 DIMM types, there are 8 banks available */ banks = 8; + dimm = mci->dimms; for (i = 0; i < NUM_CHANNELS; i++) { u32 mtr; @@ -634,29 +636,30 @@ static int get_dimm_config(const struct mem_ctl_info *mci) pvt->sbridge_dev->mc, i, j, size, npages, banks, ranks, rows, cols); - csr = &mci->csrows[csrow]; + /* + * Fake stuff. This controller doesn't see + * csrows. + */ + csr = &mci->csrows[csrow]; csr->first_page = last_page; csr->last_page = last_page + npages - 1; - csr->page_mask = 0UL; /* Unused */ csr->nr_pages = npages; - csr->grain = 32; csr->csrow_idx = csrow; - csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4; - csr->ce_count = 0; - csr->ue_count = 0; - csr->mtype = mtype; - csr->edac_mode = mode; csr->nr_channels = 1; csr->channels[0].chan_idx = i; - csr->channels[0].ce_count = 0; pvt->csrow_map[i][j] = csrow; - snprintf(csr->channels[0].dimm->label, - sizeof(csr->channels[0].dimm->label), - "CPU_SrcID#%u_Channel#%u_DIMM#%u", - pvt->sbridge_dev->source_id, i, j); last_page += npages; csrow++; + + csr->channels[0].dimm = dimm; + dimm->grain = 32; + dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; + dimm->mtype = mtype; + dimm->edac_mode = mode; + snprintf(dimm->label, sizeof(dimm->label), + "CPU_SrcID#%u_Channel#%u_DIMM#%u", + pvt->sbridge_dev->source_id, i, j); } } } -- cgit v1.1 From 5e2af0c09e60d11dd8297e259a9ca2b3d92d2cf4 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 27 Jan 2012 21:20:32 -0300 Subject: edac: Don't initialize csrow's first_page & friends when not needed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Almost all edac drivers initialize csrow_info->first_page, csrow_info->last_page and csrow_info->page_mask. Those vars are used inside the EDAC core, in order to calculate the csrow affected by an error, by using the routine edac_mc_find_csrow_by_page(). However, very few drivers actually use it: e752x_edac.c e7xxx_edac.c i3000_edac.c i82443bxgx_edac.c i82860_edac.c i82875p_edac.c i82975x_edac.c r82600_edac.c There also a few other drivers that have their own calculus formula internally using those vars. All the others are just wasting time by initializing those data. While initializing data without using them won't cause any troubles, as those information is stored at the wrong place (at csrows structure), it is better to remove what is unused, in order to simplify the next patch. Reviewed-by: Aristeu Rozanski Acked-by: Borislav Petkov Acked-by: Chris Metcalf Cc: Doug Thompson Cc: Hitoshi Mitake Cc: Andrew Morton Cc: "Niklas Söderlund" Cc: Josh Boyer Cc: Jiri Kosina Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 21147ac..d5892c0 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -642,8 +642,6 @@ static int get_dimm_config(struct mem_ctl_info *mci) * csrows. */ csr = &mci->csrows[csrow]; - csr->first_page = last_page; - csr->last_page = last_page + npages - 1; csr->nr_pages = npages; csr->csrow_idx = csrow; csr->nr_channels = 1; -- cgit v1.1 From a895bf8b1e1ea4c032a8fa8a09475a2ce09fe77a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sat, 28 Jan 2012 09:09:38 -0300 Subject: edac: move nr_pages to dimm struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The number of pages is a dimm property. Move it to the dimm struct. After this change, it is possible to add sysfs nodes for the DIMM's that will properly represent the DIMM stick properties, including its size. A TODO fix here is to properly represent dual-rank/quad-rank DIMMs when the memory controller represents the memory via chip select rows. Reviewed-by: Aristeu Rozanski Acked-by: Borislav Petkov Acked-by: Chris Metcalf Cc: Doug Thompson Cc: Mark Gross Cc: Jason Uhlenkott Cc: Tim Small Cc: Ranganathan Desikan Cc: "Arvind R." Cc: Olof Johansson Cc: Egor Martovetsky Cc: Michal Marek Cc: Jiri Kosina Cc: Joe Perches Cc: Dmitry Eremin-Solenikov Cc: Benjamin Herrenschmidt Cc: Hitoshi Mitake Cc: Andrew Morton Cc: "Niklas Söderlund" Cc: Shaohui Xie Cc: Josh Boyer Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index d5892c0..2ce9bf5 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -561,7 +561,6 @@ static int get_dimm_config(struct mem_ctl_info *mci) u32 reg; enum edac_type mode; enum mem_type mtype; - struct dimm_info *dimm; pci_read_config_dword(pvt->pci_br, SAD_TARGET, ®); pvt->sbridge_dev->source_id = SOURCE_ID(reg); @@ -613,11 +612,11 @@ static int get_dimm_config(struct mem_ctl_info *mci) /* On all supported DDR3 DIMM types, there are 8 banks available */ banks = 8; - dimm = mci->dimms; for (i = 0; i < NUM_CHANNELS; i++) { u32 mtr; for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { + struct dimm_info *dimm = &mci->dimms[j]; pci_read_config_dword(pvt->pci_tad[i], mtr_regs[j], &mtr); debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); @@ -642,15 +641,12 @@ static int get_dimm_config(struct mem_ctl_info *mci) * csrows. */ csr = &mci->csrows[csrow]; - csr->nr_pages = npages; - csr->csrow_idx = csrow; - csr->nr_channels = 1; - csr->channels[0].chan_idx = i; pvt->csrow_map[i][j] = csrow; last_page += npages; csrow++; csr->channels[0].dimm = dimm; + dimm->nr_pages = npages; dimm->grain = 32; dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; dimm->mtype = mtype; -- cgit v1.1 From c36e3e77687b39073903f55461fc9417514e831e Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Mon, 16 Apr 2012 15:12:22 -0300 Subject: sb_edac: convert driver to use the new edac ABI The legacy edac ABI is going to be removed. Port the driver to use and benefit from the new API functionality. Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 159 +++++++++++++++++-------------------------------- 1 file changed, 56 insertions(+), 103 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 2ce9bf5..8286479 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -314,8 +314,6 @@ struct sbridge_pvt { struct sbridge_info info; struct sbridge_channel channel[NUM_CHANNELS]; - int csrow_map[NUM_CHANNELS][MAX_DIMMS]; - /* Memory type detection */ bool is_mirrored, is_lockstep, is_close_pg; @@ -487,29 +485,14 @@ static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot, } /** - * sbridge_get_active_channels() - gets the number of channels and csrows + * check_if_ecc_is_active() - Checks if ECC is active * bus: Device bus - * @channels: Number of channels that will be returned - * @csrows: Number of csrows found - * - * Since EDAC core needs to know in advance the number of available channels - * and csrows, in order to allocate memory for csrows/channels, it is needed - * to run two similar steps. At the first step, implemented on this function, - * it checks the number of csrows/channels present at one socket, identified - * by the associated PCI bus. - * this is used in order to properly allocate the size of mci components. - * Note: one csrow is one dimm. */ -static int sbridge_get_active_channels(const u8 bus, unsigned *channels, - unsigned *csrows) +static int check_if_ecc_is_active(const u8 bus) { struct pci_dev *pdev = NULL; - int i, j; u32 mcmtr; - *channels = 0; - *csrows = 0; - pdev = get_pdev_slot_func(bus, 15, 0); if (!pdev) { sbridge_printk(KERN_ERR, "Couldn't find PCI device " @@ -523,41 +506,14 @@ static int sbridge_get_active_channels(const u8 bus, unsigned *channels, sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n"); return -ENODEV; } - - for (i = 0; i < NUM_CHANNELS; i++) { - u32 mtr; - - /* Device 15 functions 2 - 5 */ - pdev = get_pdev_slot_func(bus, 15, 2 + i); - if (!pdev) { - sbridge_printk(KERN_ERR, "Couldn't find PCI device " - "%2x.%02d.%d!!!\n", - bus, 15, 2 + i); - return -ENODEV; - } - (*channels)++; - - for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { - pci_read_config_dword(pdev, mtr_regs[j], &mtr); - debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr); - if (IS_DIMM_PRESENT(mtr)) - (*csrows)++; - } - } - - debugf0("Number of active channels: %d, number of active dimms: %d\n", - *channels, *csrows); - return 0; } static int get_dimm_config(struct mem_ctl_info *mci) { struct sbridge_pvt *pvt = mci->pvt_info; - struct csrow_info *csr; + struct dimm_info *dimm; int i, j, banks, ranks, rows, cols, size, npages; - int csrow = 0; - unsigned long last_page = 0; u32 reg; enum edac_type mode; enum mem_type mtype; @@ -616,7 +572,8 @@ static int get_dimm_config(struct mem_ctl_info *mci) u32 mtr; for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) { - struct dimm_info *dimm = &mci->dimms[j]; + dimm = EDAC_DIMM_PTR(mci->layers, mci->dimms, mci->n_layers, + i, j, 0); pci_read_config_dword(pvt->pci_tad[i], mtr_regs[j], &mtr); debugf4("Channel #%d MTR%d = %x\n", i, j, mtr); @@ -636,16 +593,6 @@ static int get_dimm_config(struct mem_ctl_info *mci) size, npages, banks, ranks, rows, cols); - /* - * Fake stuff. This controller doesn't see - * csrows. - */ - csr = &mci->csrows[csrow]; - pvt->csrow_map[i][j] = csrow; - last_page += npages; - csrow++; - - csr->channels[0].dimm = dimm; dimm->nr_pages = npages; dimm->grain = 32; dimm->dtype = (banks == 8) ? DEV_X8 : DEV_X4; @@ -841,11 +788,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 *socket, long *channel_mask, u8 *rank, - char *area_type) + char *area_type, char *msg) { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char msg[256]; int n_rir, n_sads, n_tads, sad_way, sck_xch; int sad_interl, idx, base_ch; int interleave_mode; @@ -867,12 +813,10 @@ static int get_memory_error_data(struct mem_ctl_info *mci, */ if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) { sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr >= (u64)pvt->tohm) { sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } @@ -889,7 +833,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = SAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) @@ -898,7 +841,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, } if (n_sads == MAX_SAD) { sprintf(msg, "Can't discover the memory socket"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } area_type = get_dram_attr(reg); @@ -939,7 +881,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Can't discover socket interleave"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } *socket = sad_interleave[idx]; @@ -954,7 +895,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (!new_mci) { sprintf(msg, "Struct for socket #%u wasn't initialized", *socket); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } mci = new_mci; @@ -970,7 +910,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, limit = TAD_LIMIT(reg); if (limit <= prv) { sprintf(msg, "Can't discover the memory channel"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } if (addr <= limit) @@ -1010,7 +949,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Can't discover the TAD target"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } *channel_mask = 1 << base_ch; @@ -1024,7 +962,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, break; default: sprintf(msg, "Invalid mirror set. Can't decode addr"); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } } else @@ -1052,7 +989,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (offset > addr) { sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!", offset, addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } addr -= offset; @@ -1092,7 +1028,6 @@ static int get_memory_error_data(struct mem_ctl_info *mci, if (n_rir == MAX_RIR_RANGES) { sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx", ch_addr); - edac_mc_handle_ce_no_info(mci, msg); return -EINVAL; } rir_way = RIR_WAY(reg); @@ -1406,7 +1341,8 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; - char *type, *optype, *msg, *recoverable_msg; + enum hw_event_mc_err_type tp_event; + char *type, *optype, msg[256], *recoverable_msg; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -1418,13 +1354,21 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, u32 optypenum = GET_BITFIELD(m->status, 4, 6); long channel_mask, first_channel; u8 rank, socket; - int csrow, rc, dimm; + int rc, dimm; char *area_type = "Unknown"; - if (ripv) - type = "NON_FATAL"; - else - type = "FATAL"; + if (uncorrected_error) { + if (ripv) { + type = "FATAL"; + tp_event = HW_EVENT_ERR_FATAL; + } else { + type = "NON_FATAL"; + tp_event = HW_EVENT_ERR_UNCORRECTED; + } + } else { + type = "CORRECTED"; + tp_event = HW_EVENT_ERR_CORRECTED; + } /* * According with Table 15-9 of the Intel Archictecture spec vol 3A, @@ -1442,19 +1386,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } else { switch (optypenum) { case 0: - optype = "generic undef request"; + optype = "generic undef request error"; break; case 1: - optype = "memory read"; + optype = "memory read error"; break; case 2: - optype = "memory write"; + optype = "memory write error"; break; case 3: - optype = "addr/cmd"; + optype = "addr/cmd error"; break; case 4: - optype = "memory scrubbing"; + optype = "memory scrubbing error"; break; default: optype = "reserved"; @@ -1463,13 +1407,13 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } rc = get_memory_error_data(mci, m->addr, &socket, - &channel_mask, &rank, area_type); + &channel_mask, &rank, area_type, msg); if (rc < 0) - return; + goto err_parsing; new_mci = get_mci_for_node_id(socket); if (!new_mci) { - edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!"); - return; + strcpy(msg, "Error: socket got corrupted!"); + goto err_parsing; } mci = new_mci; pvt = mci->pvt_info; @@ -1483,8 +1427,6 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, else dimm = 2; - csrow = pvt->csrow_map[first_channel][dimm]; - if (uncorrected_error && recoverable) recoverable_msg = " recoverable"; else @@ -1495,18 +1437,14 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, * Probably, we can just discard it, as the channel information * comes from the get_memory_error_data() address decoding */ - msg = kasprintf(GFP_ATOMIC, - "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), " - "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n", + snprintf(msg, sizeof(msg), + "%d error(s)%s: %s%s: cpu=%d Err=%04x:%04x addr = 0x%08llx socket=%d Channel=%ld(mask=%ld), rank=%d\n", core_err_cnt, + overflow ? " OVERFLOW" : "", area_type, - optype, - type, recoverable_msg, - overflow ? "OVERFLOW" : "", m->cpu, mscod, errcode, - channel, /* 1111b means not specified */ (long long) m->addr, socket, first_channel, /* This is the real channel on SB */ @@ -1515,13 +1453,19 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, debugf0("%s", msg); + /* FIXME: need support for channel mask */ + /* Call the helper to output message */ - if (uncorrected_error) - edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg); - else - edac_mc_handle_fbd_ce(mci, csrow, 0, msg); + edac_mc_handle_error(tp_event, mci, + m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0, + channel, dimm, -1, + optype, msg, m); + return; +err_parsing: + edac_mc_handle_error(tp_event, mci, 0, 0, 0, + -1, -1, -1, + msg, "", m); - kfree(msg); } /* @@ -1680,16 +1624,25 @@ static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev) static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) { struct mem_ctl_info *mci; + struct edac_mc_layer layers[2]; struct sbridge_pvt *pvt; - int rc, channels, csrows; + int rc; /* Check the number of active and not disabled channels */ - rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows); + rc = check_if_ecc_is_active(sbridge_dev->bus); if (unlikely(rc < 0)) return rc; /* allocate a new MC control structure */ - mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc); + layers[0].type = EDAC_MC_LAYER_CHANNEL; + layers[0].size = NUM_CHANNELS; + layers[0].is_virt_csrow = false; + layers[1].type = EDAC_MC_LAYER_SLOT; + layers[1].size = MAX_DIMMS; + layers[1].is_virt_csrow = true; + mci = new_edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, + sizeof(*pvt)); + if (unlikely(!mci)) return -ENOMEM; -- cgit v1.1 From ca0907b9e413bb1d1f3ea123b663535b74928846 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 2 May 2012 14:37:00 -0300 Subject: edac: Remove the legacy EDAC ABI Now that all drivers got converted to use the new ABI, we can drop the old one. Acked-by: Chris Metcalf Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 8286479..2f95a1b 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -1640,7 +1640,7 @@ static int sbridge_register_mci(struct sbridge_dev *sbridge_dev) layers[1].type = EDAC_MC_LAYER_SLOT; layers[1].size = MAX_DIMMS; layers[1].is_virt_csrow = true; - mci = new_edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, + mci = edac_mc_alloc(sbridge_dev->mc, ARRAY_SIZE(layers), layers, sizeof(*pvt)); if (unlikely(!mci)) -- cgit v1.1 From e17a2f42a484562be48128c5b8dc9f7291e8c902 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 11 May 2012 11:41:45 -0300 Subject: edac: Cleanup the logs for i7core and sb edac drivers Remove some information that it is duplicated at the MCE log, and don't have much usage for the error. Those data will be added again, when creating a trace function that outputs both memory errors and MCE fields. Cc: Aristeu Rozanski Signed-off-by: Mauro Carvalho Chehab --- drivers/edac/sb_edac.c | 42 ++++++++++++++++++------------------------ 1 file changed, 18 insertions(+), 24 deletions(-) (limited to 'drivers/edac/sb_edac.c') diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 2f95a1b..e834dfd 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -788,7 +788,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, u8 *socket, long *channel_mask, u8 *rank, - char *area_type, char *msg) + char **area_type, char *msg) { struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; @@ -843,7 +843,7 @@ static int get_memory_error_data(struct mem_ctl_info *mci, sprintf(msg, "Can't discover the memory socket"); return -EINVAL; } - area_type = get_dram_attr(reg); + *area_type = get_dram_attr(reg); interleave_mode = INTERLEAVE_MODE(reg); pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads], @@ -1342,7 +1342,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, struct mem_ctl_info *new_mci; struct sbridge_pvt *pvt = mci->pvt_info; enum hw_event_mc_err_type tp_event; - char *type, *optype, msg[256], *recoverable_msg; + char *type, *optype, msg[256]; bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0); bool overflow = GET_BITFIELD(m->status, 62, 62); bool uncorrected_error = GET_BITFIELD(m->status, 61, 61); @@ -1355,7 +1355,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, long channel_mask, first_channel; u8 rank, socket; int rc, dimm; - char *area_type = "Unknown"; + char *area_type = NULL; if (uncorrected_error) { if (ripv) { @@ -1407,7 +1407,7 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, } rc = get_memory_error_data(mci, m->addr, &socket, - &channel_mask, &rank, area_type, msg); + &channel_mask, &rank, &area_type, msg); if (rc < 0) goto err_parsing; new_mci = get_mci_for_node_id(socket); @@ -1427,29 +1427,23 @@ static void sbridge_mce_output_error(struct mem_ctl_info *mci, else dimm = 2; - if (uncorrected_error && recoverable) - recoverable_msg = " recoverable"; - else - recoverable_msg = ""; /* - * FIXME: What should we do with "channel" information on mcelog? - * Probably, we can just discard it, as the channel information - * comes from the get_memory_error_data() address decoding + * FIXME: On some memory configurations (mirror, lockstep), the + * Memory Controller can't point the error to a single DIMM. The + * EDAC core should be handling the channel mask, in order to point + * to the group of dimm's where the error may be happening. */ snprintf(msg, sizeof(msg), - "%d error(s)%s: %s%s: cpu=%d Err=%04x:%04x addr = 0x%08llx socket=%d Channel=%ld(mask=%ld), rank=%d\n", - core_err_cnt, - overflow ? " OVERFLOW" : "", - area_type, - recoverable_msg, - m->cpu, - mscod, errcode, - (long long) m->addr, - socket, - first_channel, /* This is the real channel on SB */ - channel_mask, - rank); + "count:%d%s%s area:%s err_code:%04x:%04x socket:%d channel_mask:%ld rank:%d", + core_err_cnt, + overflow ? " OVERFLOW" : "", + (uncorrected_error && recoverable) ? " recoverable" : "", + area_type, + mscod, errcode, + socket, + channel_mask, + rank); debugf0("%s", msg); -- cgit v1.1