]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
drm/xe/hwmon: Expose GPU PCIe temperature
authorKarthik Poosa <karthik.poosa@intel.com>
Mon, 12 Jan 2026 20:35:20 +0000 (02:05 +0530)
committerRodrigo Vivi <rodrigo.vivi@intel.com>
Mon, 12 Jan 2026 22:00:29 +0000 (17:00 -0500)
Expose GPU PCIe average temperature and its limits via hwmon sysfs entry
temp5_xxx.
Update Xe hwmon sysfs documentation for this.

v2: Update kernel version in Xe hwmon documentation. (Raag)

v3:
 - Address review comments from Raag.
 - Remove redundant debug log.
 - Update kernel version in Xe hwmon documentation. (Raag)

v4:
 - Address review comments from Raag.
 - Group new temperature attributes with existing temperature attributes
   as per channel index in Xe hwmon documentation.
 - Use TEMP_MASK instead of TEMP_MASK_MAILBOX.
 - Add PCIE_SENSOR_MASK which uses REG_FIELD_GET as replacement of
   PCIE_SENSOR_SHIFT.

v5:
 - Address review comments from Raag.
 - Use REG_FIELD_GET to get PCIe temperature.
 - Move PCIE_SENSOR_GROUP_ID and PCIE_SENSOR_MASK to xe_pcode_api.h
 - Cosmetic change.

Signed-off-by: Karthik Poosa <karthik.poosa@intel.com>
Reviewed-by: Raag Jadav <raag.jadav@intel.com>
Link: https://patch.msgid.link/20260112203521.1014388-4-karthik.poosa@intel.com
Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Documentation/ABI/testing/sysfs-driver-intel-xe-hwmon
drivers/gpu/drm/xe/xe_hwmon.c
drivers/gpu/drm/xe/xe_pcode_api.h

index 550206885624b27d457df10eec8514f86a722505..6e21bebf0e0daa800a4ea5d6e29768c7821e92c4 100644 (file)
@@ -189,6 +189,30 @@ Description:       RO. Memory controller average temperature in millidegree Celsius.
 
                Only supported for particular Intel Xe graphics platforms.
 
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp5_crit
+Date:          January 2026
+KernelVersion: 7.0
+Contact:       intel-xe@lists.freedesktop.org
+Description:   RO. GPU PCIe critical temperature in millidegree Celsius.
+
+               Only supported for particular Intel Xe graphics platforms.
+
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp5_emergency
+Date:          January 2026
+KernelVersion: 7.0
+Contact:       intel-xe@lists.freedesktop.org
+Description:   RO. GPU PCIe shutdown temperature in millidegree Celsius.
+
+               Only supported for particular Intel Xe graphics platforms.
+
+What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/temp5_input
+Date:          January 2026
+KernelVersion: 7.0
+Contact:       intel-xe@lists.freedesktop.org
+Description:   RO. GPU PCIe temperature in millidegree Celsius.
+
+               Only supported for particular Intel Xe graphics platforms.
+
 What:          /sys/bus/pci/drivers/xe/.../hwmon/hwmon<i>/fan1_input
 Date:          March 2025
 KernelVersion: 6.16
index 51a2c23be99ec84beafc566191cc17f8d8cbd0df..e8604e6300acc2b75555c6a75bfeac79f6a2cf1c 100644 (file)
@@ -44,6 +44,7 @@ enum xe_hwmon_channel {
        CHANNEL_PKG,
        CHANNEL_VRAM,
        CHANNEL_MCTRL,
+       CHANNEL_PCIE,
        CHANNEL_MAX,
 };
 
@@ -712,6 +713,7 @@ static const struct hwmon_channel_info * const hwmon_info[] = {
                           HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL |
                           HWMON_T_MAX,
                           HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL,
+                          HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL,
                           HWMON_T_CRIT | HWMON_T_EMERGENCY | HWMON_T_INPUT | HWMON_T_LABEL),
        HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
                           HWMON_P_CAP,
@@ -771,6 +773,27 @@ static int get_mc_temp(struct xe_hwmon *hwmon, long *val)
        return 0;
 }
 
+static int get_pcie_temp(struct xe_hwmon *hwmon, long *val)
+{
+       struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+       u32 data = 0;
+       int ret;
+
+       ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_THERMAL_INFO, READ_THERMAL_DATA,
+                                                 PCIE_SENSOR_GROUP_ID), &data, NULL);
+       if (ret)
+               return ret;
+
+       /* Sensor offset is different for G21 */
+       if (hwmon->xe->info.subplatform != XE_SUBPLATFORM_BATTLEMAGE_G21)
+               data = REG_FIELD_GET(PCIE_SENSOR_MASK, data);
+
+       data = REG_FIELD_GET(TEMP_MASK, data);
+       *val = (s8)data * MILLIDEGREE_PER_DEGREE;
+
+       return 0;
+}
+
 /* I1 is exposed as power_crit or as curr_crit depending on bit 31 */
 static int xe_hwmon_pcode_read_i1(const struct xe_hwmon *hwmon, u32 *uval)
 {
@@ -876,6 +899,7 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
                case CHANNEL_VRAM:
                        return hwmon->temp.limit[TEMP_LIMIT_MEM_SHUTDOWN] ? 0444 : 0;
                case CHANNEL_MCTRL:
+               case CHANNEL_PCIE:
                        return hwmon->temp.count ? 0444 : 0;
                default:
                        return 0;
@@ -887,6 +911,7 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
                case CHANNEL_VRAM:
                        return hwmon->temp.limit[TEMP_LIMIT_MEM_CRIT] ? 0444 : 0;
                case CHANNEL_MCTRL:
+               case CHANNEL_PCIE:
                        return hwmon->temp.count ? 0444 : 0;
                default:
                        return 0;
@@ -906,6 +931,7 @@ xe_hwmon_temp_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
                        return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_TEMP,
                                                                channel)) ? 0444 : 0;
                case CHANNEL_MCTRL:
+               case CHANNEL_PCIE:
                        return hwmon->temp.count ? 0444 : 0;
                default:
                        return 0;
@@ -933,6 +959,8 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
                        return 0;
                case CHANNEL_MCTRL:
                        return get_mc_temp(hwmon, val);
+               case CHANNEL_PCIE:
+                       return get_pcie_temp(hwmon, val);
                default:
                        return -EOPNOTSUPP;
                }
@@ -940,6 +968,7 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
                switch (channel) {
                case CHANNEL_PKG:
                case CHANNEL_MCTRL:
+               case CHANNEL_PCIE:
                        *val = hwmon->temp.limit[TEMP_LIMIT_PKG_SHUTDOWN] * MILLIDEGREE_PER_DEGREE;
                        return 0;
                case CHANNEL_VRAM:
@@ -952,6 +981,7 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
                switch (channel) {
                case CHANNEL_PKG:
                case CHANNEL_MCTRL:
+               case CHANNEL_PCIE:
                        *val = hwmon->temp.limit[TEMP_LIMIT_PKG_CRIT] * MILLIDEGREE_PER_DEGREE;
                        return 0;
                case CHANNEL_VRAM:
@@ -1331,6 +1361,8 @@ static int xe_hwmon_read_label(struct device *dev,
                        *str = "vram";
                else if (channel == CHANNEL_MCTRL)
                        *str = "mctrl";
+               else if (channel == CHANNEL_PCIE)
+                       *str = "pcie";
                return 0;
        case hwmon_power:
        case hwmon_energy:
index ad713a3e34e5daf6eba26ed3b9dc44d88a5dd387..85cc7478b787ae427c9bccda33a33824b67bc631 100644 (file)
@@ -54,6 +54,8 @@
 #define     READ_THERMAL_LIMITS                        0x0
 #define     READ_THERMAL_CONFIG                        0x1
 #define     READ_THERMAL_DATA                  0x2
+#define       PCIE_SENSOR_GROUP_ID             0x2
+#define       PCIE_SENSOR_MASK                 REG_GENMASK(31, 16)
 
 #define   PCODE_LATE_BINDING                   0x5C
 #define     GET_CAPABILITY_STATUS              0x0