From: Maciej W. Rozycki Date: Sat, 28 Mar 2026 15:49:57 +0000 (+0000) Subject: MIPS: DEC: Rate-limit memory errors for ECC systems X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=56236b7f6f4461e2aa1d1210de14e91c61601e53;p=thirdparty%2Fkernel%2Flinux.git MIPS: DEC: Rate-limit memory errors for ECC systems Prevent the system from becoming unusable due to a flood of memory error messages with DECstation and DECsystem models using ECC, that is KN02, KN03 and KN05 systems. It seems common for gradual oxidation of memory module contacts to cause memory errors to eventually develop and while ECC takes care of correcting them and the system affected can continue operating normally until the contacts have been cleaned, the unlimited messages make the system spend all its time on producing them, therefore preventing it from being used. Rate-limiting removes the load from the system and enables its normal operation, e.g.: Bus error interrupt: CPU memory read ECC error at 0x139cfb04 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU partial memory write ECC error at 0x138c1f5c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU partial memory write ECC error at 0x138c1f6c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x139cff64 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af00c ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af044 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0cc ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0cc ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af0e4 ECC syndrome 0x54 -- corrected single bit error at data bit D3 Bus error interrupt: CPU memory read ECC error at 0x136af104 ECC syndrome 0x54 -- corrected single bit error at data bit D3 dec_ecc_be_backend: 34455 callbacks suppressed Signed-off-by: Maciej W. Rozycki Signed-off-by: Thomas Bogendoerfer --- diff --git a/arch/mips/dec/ecc-berr.c b/arch/mips/dec/ecc-berr.c index 1eb356fdd8323..3d41ff07d8178 100644 --- a/arch/mips/dec/ecc-berr.c +++ b/arch/mips/dec/ecc-berr.c @@ -5,12 +5,13 @@ * 5000/240 (KN03), 5000/260 (KN05) and DECsystem 5900 (KN03), * 5900/260 (KN05) systems. * - * Copyright (c) 2003, 2005 Maciej W. Rozycki + * Copyright (c) 2003, 2005, 2026 Maciej W. Rozycki */ #include #include #include +#include #include #include @@ -51,6 +52,10 @@ static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker) static const char overstr[] = "overrun"; static const char eccstr[] = "ECC error"; + static DEFINE_RATELIMIT_STATE(rs, + DEFAULT_RATELIMIT_INTERVAL, + DEFAULT_RATELIMIT_BURST); + const char *kind, *agent, *cycle, *event; const char *status = "", *xbit = "", *fmt = ""; unsigned long address; @@ -70,7 +75,7 @@ static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker) if (!(erraddr & KN0X_EAR_VALID)) { /* No idea what happened. */ - printk(KERN_ALERT "Unidentified bus error %s\n", kind); + pr_alert_ratelimited("Unidentified bus error %s\n", kind); return action; } @@ -180,12 +185,13 @@ static int dec_ecc_be_backend(struct pt_regs *regs, int is_fixup, int invoker) } } - if (action != MIPS_BE_FIXUP) + if (action != MIPS_BE_FIXUP && __ratelimit(&rs)) { printk(KERN_ALERT "Bus error %s: %s %s %s at %#010lx\n", kind, agent, cycle, event, address); - if (action != MIPS_BE_FIXUP && erraddr & KN0X_EAR_ECCERR) - printk(fmt, " ECC syndrome ", syn, status, xbit, i); + if (erraddr & KN0X_EAR_ECCERR) + printk(fmt, " ECC syndrome ", syn, status, xbit, i); + } return action; }