vmcoreinfo: track and log recoverable hardware errors

author Breno Leitao <leitao@debian.org>

Fri, 10 Oct 2025 10:36:50 +0000 (03:36 -0700)

committer Andrew Morton <akpm@linux-foundation.org>

Thu, 27 Nov 2025 22:24:44 +0000 (14:24 -0800)
author Breno Leitao <leitao@debian.org>
Fri, 10 Oct 2025 10:36:50 +0000 (03:36 -0700)
committer Andrew Morton <akpm@linux-foundation.org>
Thu, 27 Nov 2025 22:24:44 +0000 (14:24 -0800)
diff --git a/Documentation/driver-api/hw-recoverable-errors.rst b/Documentation/driver-api/hw-recoverable-errors.rst

new file mode 100644 (file)

index 0000000..fc526c3
--- /dev/null
+++ b/Documentation/driver-api/hw-recoverable-errors.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================================================
+Recoverable Hardware Error Tracking in vmcoreinfo
+=================================================
+
+Overview
+--------
+
+This feature provides a generic infrastructure within the Linux kernel to track
+and log recoverable hardware errors. These are hardware recoverable errors
+visible that might not cause immediate panics but may influence health, mainly
+because new code path will be executed in the kernel.
+
+By recording counts and timestamps of recoverable errors into the vmcoreinfo
+crash dump notes, this infrastructure aids post-mortem crash analysis tools in
+correlating hardware events with kernel failures. This enables faster triage
+and better understanding of root causes, especially in large-scale cloud
+environments where hardware issues are common.
+
+Benefits
+--------
+
+- Facilitates correlation of hardware recoverable errors with kernel panics or
+  unusual code paths that lead to system crashes.
+- Provides operators and cloud providers quick insights, improving reliability
+  and reducing troubleshooting time.
+- Complements existing full hardware diagnostics without replacing them.
+
+Data Exposure and Consumption
+-----------------------------
+
+- The tracked error data consists of per-error-type counts and timestamps of
+  last occurrence.
+- This data is stored in the `hwerror_data` array, categorized by error source
+  types like CPU, memory, PCI, CXL, and others.
+- It is exposed via vmcoreinfo crash dump notes and can be read using tools
+  like `crash`, `drgn`, or other kernel crash analysis utilities.
+- There is no other way to read these data other than from crash dumps.
+- These errors are divided by area, which includes CPU, Memory, PCI, CXL and
+  others.
+
+Typical usage example (in drgn REPL):
+
+.. code-block:: python
+
+    >>> prog['hwerror_data']
+    (struct hwerror_info[HWERR_RECOV_MAX]){
+        {
+            .count = (int)844,
+            .timestamp = (time64_t)1752852018,
+        },
+        ...
+    }
+
+Enabling
+--------
+
+- This feature is enabled when CONFIG_VMCORE_INFO is set.
+
diff --git a/Documentation/driver-api/index.rst b/Documentation/driver-api/index.rst

index 3e2a270bd82826cd78ffc6f18214fdbde151a36a..a35705b44799624cc0c3399d350091824340879b 100644 (file)
--- a/Documentation/driver-api/index.rst
+++ b/Documentation/driver-api/index.rst
@@ -96,6 +96,7 @@ Subsystem-specific APIs
     gpio/index
     hsi
     hte/index
+   hw-recoverable-errors
     i2c
     iio/index
     infiniband
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c

index 460e90a1a0b172d2b8b88e3635922c637b6e684a..08adbf4cd6edc7a1c619df42134e78662999a2a0 100644 (file)
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -45,6 +45,7 @@
  #include <linux/task_work.h>
  #include <linux/hardirq.h>
  #include <linux/kexec.h>
+#include <linux/vmcore_info.h>
  
  #include <asm/fred.h>
  #include <asm/cpu_device_id.h>
@@ -1700,6 +1701,9 @@ noinstr void do_machine_check(struct pt_regs *regs)
         }
  
  out:
+       /* Given it didn't panic, mark it as recoverable */
+       hwerr_log_error_type(HWERR_RECOV_OTHERS);
+
         instrumentation_end();
  
  clear:
diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c

index 97ee19f2cae0607be65aacce4cd21be99ae0a7a3..92b0e3c391b2d199d599700e2c6bf80c48f15153 100644 (file)
--- a/drivers/acpi/apei/ghes.c
+++ b/drivers/acpi/apei/ghes.c
@@ -43,6 +43,7 @@
  #include <linux/uuid.h>
  #include <linux/ras.h>
  #include <linux/task_work.h>
+#include <linux/vmcore_info.h>
  
  #include <acpi/actbl1.h>
  #include <acpi/ghes.h>
@@ -867,6 +868,40 @@ int cxl_cper_kfifo_get(struct cxl_cper_work_data *wd)
  }
  EXPORT_SYMBOL_NS_GPL(cxl_cper_kfifo_get, "CXL");
  
+static void ghes_log_hwerr(int sev, guid_t *sec_type)
+{
+       if (sev != CPER_SEV_RECOVERABLE)
+               return;
+
+       if (guid_equal(sec_type, &CPER_SEC_PROC_ARM) ||
+           guid_equal(sec_type, &CPER_SEC_PROC_GENERIC) ||
+           guid_equal(sec_type, &CPER_SEC_PROC_IA)) {
+               hwerr_log_error_type(HWERR_RECOV_CPU);
+               return;
+       }
+
+       if (guid_equal(sec_type, &CPER_SEC_CXL_PROT_ERR) ||
+           guid_equal(sec_type, &CPER_SEC_CXL_GEN_MEDIA_GUID) ||
+           guid_equal(sec_type, &CPER_SEC_CXL_DRAM_GUID) ||
+           guid_equal(sec_type, &CPER_SEC_CXL_MEM_MODULE_GUID)) {
+               hwerr_log_error_type(HWERR_RECOV_CXL);
+               return;
+       }
+
+       if (guid_equal(sec_type, &CPER_SEC_PCIE) ||
+           guid_equal(sec_type, &CPER_SEC_PCI_X_BUS)) {
+               hwerr_log_error_type(HWERR_RECOV_PCI);
+               return;
+       }
+
+       if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
+               hwerr_log_error_type(HWERR_RECOV_MEMORY);
+               return;
+       }
+
+       hwerr_log_error_type(HWERR_RECOV_OTHERS);
+}
+
  static void ghes_do_proc(struct ghes *ghes,
                          const struct acpi_hest_generic_status *estatus)
  {
@@ -888,6 +923,7 @@ static void ghes_do_proc(struct ghes *ghes,
                 if (gdata->validation_bits & CPER_SEC_VALID_FRU_TEXT)
                         fru_text = gdata->fru_text;
  
+               ghes_log_hwerr(sev, sec_type);
                 if (guid_equal(sec_type, &CPER_SEC_PLATFORM_MEM)) {
                         struct cper_sec_mem_err *mem_err = acpi_hest_get_payload(gdata);
  
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c

index 0b5ed4722ac3238362c98812be7179bf665d3ce6..e0bcaa896803c91e7451e57fed06a725c90c6114 100644 (file)
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -30,6 +30,7 @@
  #include <linux/kfifo.h>
  #include <linux/ratelimit.h>
  #include <linux/slab.h>
+#include <linux/vmcore_info.h>
  #include <acpi/apei.h>
  #include <acpi/ghes.h>
  #include <ras/ras_event.h>
@@ -765,6 +766,7 @@ static void pci_dev_aer_stats_incr(struct pci_dev *pdev,
                 break;
         case AER_NONFATAL:
                 aer_info->dev_total_nonfatal_errs++;
+               hwerr_log_error_type(HWERR_RECOV_PCI);
                 counter = &aer_info->dev_nonfatal_errs[0];
                 max = AER_MAX_TYPEOF_UNCOR_ERRS;
                 break;
diff --git a/include/linux/vmcore_info.h b/include/linux/vmcore_info.h

index 37e003ae52626ad83b89475c45f634c5ac6c61a1..e71518caacdfcf1a85e9ffaeb7926b55972c07dd 100644 (file)
--- a/include/linux/vmcore_info.h
+++ b/include/linux/vmcore_info.h
@@ -5,6 +5,7 @@
  #include <linux/linkage.h>
  #include <linux/elfcore.h>
  #include <linux/elf.h>
+#include <uapi/linux/vmcore.h>
  
  #define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4)
  #define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(NN_PRSTATUS), 4)
@@ -77,4 +78,11 @@ extern u32 *vmcoreinfo_note;
  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
                           void *data, size_t data_len);
  void final_note(Elf_Word *buf);
+
+#ifdef CONFIG_VMCORE_INFO
+void hwerr_log_error_type(enum hwerr_error_type src);
+#else
+static inline void hwerr_log_error_type(enum hwerr_error_type src) {};
+#endif
+
  #endif /* LINUX_VMCORE_INFO_H */
diff --git a/include/uapi/linux/vmcore.h b/include/uapi/linux/vmcore.h

index 3e9da91866ffd38d94c658918193d9c0cd3cae2e..2ba89fafa518ae3ef158dd3384faef565b7db24c 100644 (file)
--- a/include/uapi/linux/vmcore.h
+++ b/include/uapi/linux/vmcore.h
@@ -15,4 +15,13 @@ struct vmcoredd_header {
         __u8 dump_name[VMCOREDD_MAX_NAME_BYTES]; /* Device dump's name */
  };
  
+enum hwerr_error_type {
+       HWERR_RECOV_CPU,
+       HWERR_RECOV_MEMORY,
+       HWERR_RECOV_PCI,
+       HWERR_RECOV_CXL,
+       HWERR_RECOV_OTHERS,
+       HWERR_RECOV_MAX,
+};
+
  #endif /* _UAPI_VMCORE_H */
diff --git a/kernel/vmcore_info.c b/kernel/vmcore_info.c

index e066d31d08f89542b78fabf48efd2685aba2293c..fe9bf8db1922e6a269e3d4034b238266751dcbcb 100644 (file)
--- a/kernel/vmcore_info.c
+++ b/kernel/vmcore_info.c
@@ -31,6 +31,13 @@ u32 *vmcoreinfo_note;
  /* trusted vmcoreinfo, e.g. we can make a copy in the crash memory */
  static unsigned char *vmcoreinfo_data_safecopy;
  
+struct hwerr_info {
+       atomic_t count;
+       time64_t timestamp;
+};
+
+static struct hwerr_info hwerr_data[HWERR_RECOV_MAX];
+
  Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type,
                           void *data, size_t data_len)
  {
@@ -118,6 +125,16 @@ phys_addr_t __weak paddr_vmcoreinfo_note(void)
  }
  EXPORT_SYMBOL(paddr_vmcoreinfo_note);
  
+void hwerr_log_error_type(enum hwerr_error_type src)
+{
+       if (src < 0 || src >= HWERR_RECOV_MAX)
+               return;
+
+       atomic_inc(&hwerr_data[src].count);
+       WRITE_ONCE(hwerr_data[src].timestamp, ktime_get_real_seconds());
+}
+EXPORT_SYMBOL_GPL(hwerr_log_error_type);
+
  static int __init crash_save_vmcoreinfo_init(void)
  {
         vmcoreinfo_data = (unsigned char *)get_zeroed_page(GFP_KERNEL);
author	Breno Leitao <leitao@debian.org>
	Fri, 10 Oct 2025 10:36:50 +0000 (03:36 -0700)
committer	Andrew Morton <akpm@linux-foundation.org>
	Thu, 27 Nov 2025 22:24:44 +0000 (14:24 -0800)
Documentation/driver-api/hw-recoverable-errors.rst	[new file with mode: 0644]	patch \| blob
Documentation/driver-api/index.rst		patch \| blob \| blame \| history
arch/x86/kernel/cpu/mce/core.c		patch \| blob \| blame \| history
drivers/acpi/apei/ghes.c		patch \| blob \| blame \| history
drivers/pci/pcie/aer.c		patch \| blob \| blame \| history
include/linux/vmcore_info.h		patch \| blob \| blame \| history
include/uapi/linux/vmcore.h		patch \| blob \| blame \| history
kernel/vmcore_info.c		patch \| blob \| blame \| history