From: Qiuxu Zhuo Date: Thu, 21 May 2026 07:31:12 +0000 (+0800) Subject: EDAC/imh: Add RRL support for Intel Diamond Rapids server X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=bdfc4367e3f516479e0a68c731bea5c6638a6c7e;p=thirdparty%2Fkernel%2Flinux.git EDAC/imh: Add RRL support for Intel Diamond Rapids server Compared to previous generations, Diamond Rapids RRL (Retry Read error Log) operates at DDR sub-channel granularity and adds an extra register per set. It also increases the CORRERRCNT register width from 4 to 8 bytes while reducing the number of registers from 8 to 4. Add the Diamond Rapids RRL register configuration table and enable support. Signed-off-by: Qiuxu Zhuo Signed-off-by: Tony Luck Tested-by: Yi Lai Link: https://patch.msgid.link/20260521073112.3881223-9-qiuxu.zhuo@intel.com --- diff --git a/drivers/edac/imh_base.c b/drivers/edac/imh_base.c index dfdcfa127ce74..6ca0df031bf51 100644 --- a/drivers/edac/imh_base.c +++ b/drivers/edac/imh_base.c @@ -71,6 +71,39 @@ struct local_reg { .width = (cfg)->ip_name##_reg_##reg_name##_width, \ } +static struct res_config *res_cfg; +static int retry_rd_err_log; + +#define REG_RRL_DEFINE(a0, a1, a2, a3, a4, a5, a6, b0, b1, b2, b3) \ + { \ + .set_num = 4, \ + .reg_num = 7, \ + .sources = {RRL_SRC_FRE_SCRUB, RRL_SRC_FRE_DEMAND, RRL_SRC_LRE_SCRUB, RRL_SRC_LRE_DEMAND}, \ + .offsets = { \ + {a0, a1, a2, a3, a4, a5, a6}, \ + {a0 + 4, a1 + 4, a2 + 8, a3 + 4, a4 + 4, a5 + 8, a6 + 8}, \ + {a0 + 8, a1 + 8, a2 + 16, a3 + 8, a4 + 8, a5 + 16, a6 + 16}, \ + {a0 + 12, a1 + 12, a2 + 24, a3 + 12, a4 + 12, a5 + 24, a6 + 24}, \ + }, \ + .widths = {4, 4, 8, 4, 4, 8, 8}, \ + .v_mask = BIT(0), \ + .uc_mask = BIT(1), \ + .over_mask = BIT(2), \ + .en_mask = BIT(12), \ + .en_patspr_mask = BIT(14), \ + .noover_mask = BIT(15), \ + .cecnt_num = 4, \ + .cecnt_offsets = {b0, b1, b2, b3}, \ + .cecnt_widths = {8, 8, 8, 8}, \ +} + +static struct reg_rrl dmr_reg_rrl_ddr_subch0 = REG_RRL_DEFINE( + 0x2dc0, 0x2dd0, 0x2de0, 0x2e00, 0x2e10, 0x2f70, 0x0200, + 0x2c10, 0x2c18, 0x2c20, 0x2c28); +static struct reg_rrl dmr_reg_rrl_ddr_subch1 = REG_RRL_DEFINE( + 0x6dc0, 0x6dd0, 0x6de0, 0x6e00, 0x6e10, 0x6f70, 0x4200, + 0x6c10, 0x6c18, 0x6c20, 0x6c28); + static void __read_local_reg(void *reg) { struct local_reg *r = (struct local_reg *)reg; @@ -480,6 +513,8 @@ static struct res_config dmr_cfg = { .ha_size = 0x1000, .ha_reg_mode_offset = 0x4a0, .ha_reg_mode_width = 4, + .reg_rrl_ddr[0] = &dmr_reg_rrl_ddr_subch0, + .reg_rrl_ddr[1] = &dmr_reg_rrl_ddr_subch1, }; static const struct x86_cpu_id imh_cpuids[] = { @@ -519,6 +554,7 @@ static int __init imh_init(void) return -ENODEV; cfg = (struct res_config *)id->driver_data; skx_set_res_cfg(cfg); + res_cfg = cfg; if (!imh_get_tolm_tohm(cfg, &tolm, &tohm)) return -ENODEV; @@ -553,6 +589,13 @@ static int __init imh_init(void) mce_register_decode_chain(&imh_mce_dec); skx_setup_debug("imh_test"); + cfg->rrl_ctrl_mode = retry_rd_err_log; + if (retry_rd_err_log && cfg->reg_rrl_ddr[0]) { + skx_set_show_rrl(skx_show_rrl); + if (retry_rd_err_log == RRL_CTRL_LINUX) + skx_enable_rrl(true); + } + imh_printk(KERN_INFO, "%s\n", IMH_REVISION); return 0; @@ -565,6 +608,12 @@ static void __exit imh_exit(void) { edac_dbg(2, "\n"); + if (retry_rd_err_log && res_cfg->reg_rrl_ddr[0]) { + if (retry_rd_err_log == RRL_CTRL_LINUX) + skx_enable_rrl(false); + skx_set_show_rrl(NULL); + } + skx_teardown_debug(); mce_unregister_decode_chain(&imh_mce_dec); skx_adxl_put(); @@ -574,6 +623,9 @@ static void __exit imh_exit(void) module_init(imh_init); module_exit(imh_exit); +module_param(retry_rd_err_log, int, 0444); +MODULE_PARM_DESC(retry_rd_err_log, "retry_rd_err_log: 0=off(default), 1=bios(Linux doesn't reset any control bits, but just reports values.), 2=linux(Linux tries to take control and resets mode bits, clear valid/UC bits after reading.)"); + MODULE_LICENSE("GPL"); MODULE_AUTHOR("Qiuxu Zhuo"); MODULE_DESCRIPTION("MC Driver for Intel servers using IMH-based memory controller"); diff --git a/drivers/edac/skx_common.h b/drivers/edac/skx_common.h index 6d4cf0dd412ae..777252cca809f 100644 --- a/drivers/edac/skx_common.h +++ b/drivers/edac/skx_common.h @@ -77,7 +77,7 @@ /* Max RRL register sets per {,sub-,pseudo-}channel. */ #define NUM_RRL_SET 4 /* Max RRL registers per set. */ -#define NUM_RRL_REG 6 +#define NUM_RRL_REG 7 /* Max correctable error count registers. */ #define NUM_CECNT_REG 8