]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
net/mlx5: Add sensor name to temperature event message
authorShahar Shitrit <shshitrit@nvidia.com>
Thu, 13 Feb 2025 09:46:41 +0000 (11:46 +0200)
committerJakub Kicinski <kuba@kernel.org>
Tue, 18 Feb 2025 00:27:38 +0000 (16:27 -0800)
Previously, a temperature event message included a bitmap indicating
which sensors detect high temperatures.

To enhance clarity, we modify the message format to explicitly list
the names of the overheating sensors, alongside the sensors bitmap.
If HWMON is not configured, the event message remains unchanged.

Signed-off-by: Shahar Shitrit <shshitrit@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Reviewed-by: Simon Horman <horms@kernel.org>
Link: https://patch.msgid.link/20250213094641.226501-5-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
drivers/net/ethernet/mellanox/mlx5/core/events.c
drivers/net/ethernet/mellanox/mlx5/core/hwmon.c
drivers/net/ethernet/mellanox/mlx5/core/hwmon.h

index e85a9042e3c24e66f254539409beb6c14dfda8d2..01c5f5990f9aea6106363fad5c997441a89acbbe 100644 (file)
@@ -6,6 +6,7 @@
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "lib/events.h"
+#include "hwmon.h"
 
 struct mlx5_event_nb {
        struct mlx5_nb  nb;
@@ -153,11 +154,28 @@ static int any_notifier(struct notifier_block *nb,
        return NOTIFY_OK;
 }
 
+#if IS_ENABLED(CONFIG_HWMON)
+static void print_sensor_names_in_bit_set(struct mlx5_core_dev *dev, struct mlx5_hwmon *hwmon,
+                                         u64 bit_set, int bit_set_offset)
+{
+       unsigned long *bit_set_ptr = (unsigned long *)&bit_set;
+       int num_bits = sizeof(bit_set) * BITS_PER_BYTE;
+       int i;
+
+       for_each_set_bit(i, bit_set_ptr, num_bits) {
+               const char *sensor_name = hwmon_get_sensor_name(hwmon, i + bit_set_offset);
+
+               mlx5_core_warn(dev, "Sensor name[%d]: %s\n", i + bit_set_offset, sensor_name);
+       }
+}
+#endif /* CONFIG_HWMON */
+
 /* type == MLX5_EVENT_TYPE_TEMP_WARN_EVENT */
 static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
 {
        struct mlx5_event_nb *event_nb = mlx5_nb_cof(nb, struct mlx5_event_nb, nb);
        struct mlx5_events   *events   = event_nb->ctx;
+       struct mlx5_core_dev *dev      = events->dev;
        struct mlx5_eqe      *eqe      = data;
        u64 value_lsb;
        u64 value_msb;
@@ -169,10 +187,17 @@ static int temp_warn(struct notifier_block *nb, unsigned long type, void *data)
        value_lsb &= 0x1;
        value_msb = be64_to_cpu(eqe->data.temp_warning.sensor_warning_msb);
 
-       if (net_ratelimit())
-               mlx5_core_warn(events->dev,
-                              "High temperature on sensors with bit set %#llx %#llx",
+       if (net_ratelimit()) {
+               mlx5_core_warn(dev, "High temperature on sensors with bit set %#llx %#llx.\n",
                               value_msb, value_lsb);
+#if IS_ENABLED(CONFIG_HWMON)
+               if (dev->hwmon) {
+                       print_sensor_names_in_bit_set(dev, dev->hwmon, value_lsb, 0);
+                       print_sensor_names_in_bit_set(dev, dev->hwmon, value_msb,
+                                                     sizeof(value_lsb) * BITS_PER_BYTE);
+               }
+#endif
+       }
 
        return NOTIFY_OK;
 }
index 353f81dccd1cee9c643bb73bcffef6a15705e348..4ba2636d7fb6c8b4999ca2b0289d45cb05aa1402 100644 (file)
@@ -416,3 +416,8 @@ void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev)
        mlx5_hwmon_free(hwmon);
        mdev->hwmon = NULL;
 }
+
+const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel)
+{
+       return hwmon->temp_channel_desc[channel].sensor_name;
+}
index 999654a9b9da544101d9c284606d81557324d483..f38271c22c1053fdd45efe91761c1a613fc0e27a 100644 (file)
@@ -10,6 +10,7 @@
 
 int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev);
 void mlx5_hwmon_dev_unregister(struct mlx5_core_dev *mdev);
+const char *hwmon_get_sensor_name(struct mlx5_hwmon *hwmon, int channel);
 
 #else
 static inline int mlx5_hwmon_dev_register(struct mlx5_core_dev *mdev)