summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTony Luck <tony.luck@intel.com>2016-04-28 07:52:11 -0700
committerBorislav Petkov <bp@suse.de>2016-04-29 16:41:24 +0200
commit5359534505c74841dbb2c6baf41db1a395acd34d (patch)
treea2a9bdd3aa0f4b71e53d1d43aa9a5d34948bba31
parentde0336b30de1b3e3ad6b76b5ccf0b4920d328313 (diff)
downloadlinux-0-day-5359534505c74841dbb2c6baf41db1a395acd34d.tar.gz
linux-0-day-5359534505c74841dbb2c6baf41db1a395acd34d.tar.xz
EDAC, i7core: Remove double buffering of error records
In the bad old days the functions from x86_mce_decoder_chain could be called in machine check context. So we used to carefully copy them and defer processing until later. But in f29a7aff4bd60 ("x86/mce: Avoid potential deadlock due to printk() in MCE context") we switched the logging code to save the record in a genpool, and call the functions that registered to be notified later from a work queue. So drop all the double buffering and do all the work we want to do as soon as i7core_mce_check_error() is called. Signed-off-by: Tony Luck <tony.luck@intel.com> Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com> Cc: linux-edac <linux-edac@vger.kernel.org> Link: http://lkml.kernel.org/r/29ab2c370915c6e132fc5d88e7b72cb834bedbfe.1461855008.git.tony.luck@intel.com Signed-off-by: Borislav Petkov <bp@suse.de>
-rw-r--r--drivers/edac/i7core_edac.c81
1 files changed, 5 insertions, 76 deletions
diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c
index 01087a38da226..60e0bb53e9c9e 100644
--- a/drivers/edac/i7core_edac.c
+++ b/drivers/edac/i7core_edac.c
@@ -271,16 +271,6 @@ struct i7core_pvt {
bool is_registered, enable_scrub;
- /* Fifo double buffers */
- struct mce mce_entry[MCE_LOG_LEN];
- struct mce mce_outentry[MCE_LOG_LEN];
-
- /* Fifo in/out counters */
- unsigned mce_in, mce_out;
-
- /* Count indicator to show errors not got */
- unsigned mce_overrun;
-
/* DCLK Frequency used for computing scrub rate */
int dclk_freq;
@@ -1792,56 +1782,15 @@ static void i7core_mce_output_error(struct mem_ctl_info *mci,
* i7core_check_error Retrieve and process errors reported by the
* hardware. Called by the Core module.
*/
-static void i7core_check_error(struct mem_ctl_info *mci)
+static void i7core_check_error(struct mem_ctl_info *mci, struct mce *m)
{
struct i7core_pvt *pvt = mci->pvt_info;
- int i;
- unsigned count = 0;
- struct mce *m;
- /*
- * MCE first step: Copy all mce errors into a temporary buffer
- * We use a double buffering here, to reduce the risk of
- * losing an error.
- */
- smp_rmb();
- count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
- % MCE_LOG_LEN;
- if (!count)
- goto check_ce_error;
-
- m = pvt->mce_outentry;
- if (pvt->mce_in + count > MCE_LOG_LEN) {
- unsigned l = MCE_LOG_LEN - pvt->mce_in;
-
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
- smp_wmb();
- pvt->mce_in = 0;
- count -= l;
- m += l;
- }
- memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
- smp_wmb();
- pvt->mce_in += count;
-
- smp_rmb();
- if (pvt->mce_overrun) {
- i7core_printk(KERN_ERR, "Lost %d memory errors\n",
- pvt->mce_overrun);
- smp_wmb();
- pvt->mce_overrun = 0;
- }
-
- /*
- * MCE second step: parse errors and display
- */
- for (i = 0; i < count; i++)
- i7core_mce_output_error(mci, &pvt->mce_outentry[i]);
+ i7core_mce_output_error(mci, m);
/*
* Now, let's increment CE error counts
*/
-check_ce_error:
if (!pvt->is_registered)
i7core_udimm_check_mc_ecc_err(mci);
else
@@ -1849,12 +1798,8 @@ check_ce_error:
}
/*
- * i7core_mce_check_error Replicates mcelog routine to get errors
- * This routine simply queues mcelog errors, and
- * return. The error itself should be handled later
- * by i7core_check_error.
- * WARNING: As this routine should be called at NMI time, extra care should
- * be taken to avoid deadlocks, and to be as fast as possible.
+ * Check that logging is enabled and that this is the right type
+ * of error for us to handle.
*/
static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
void *data)
@@ -1882,21 +1827,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val,
if (mce->bank != 8)
return NOTIFY_DONE;
- smp_rmb();
- if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
- smp_wmb();
- pvt->mce_overrun++;
- return NOTIFY_DONE;
- }
-
- /* Copy memory error at the ringbuffer */
- memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
- smp_wmb();
- pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
-
- /* Handle fatal errors immediately */
- if (mce->mcgstatus & 1)
- i7core_check_error(mci);
+ i7core_check_error(mci, mce);
/* Advise mcelog that the errors were handled */
return NOTIFY_STOP;
@@ -2243,8 +2174,6 @@ static int i7core_register_mci(struct i7core_dev *i7core_dev)
get_dimm_config(mci);
/* record ptr to the generic device */
mci->pdev = &i7core_dev->pdev[0]->dev;
- /* Set the function pointer to an actual operation function */
- mci->edac_check = i7core_check_error;
/* Enable scrubrate setting */
if (pvt->enable_scrub)