]> git.ipfire.org Git - thirdparty/kernel/linux.git/commitdiff
thermal: Add PCIe cooling driver
authorIlpo Järvinen <ilpo.jarvinen@linux.intel.com>
Fri, 18 Oct 2024 14:47:54 +0000 (17:47 +0300)
committerBjorn Helgaas <bhelgaas@google.com>
Sat, 16 Nov 2024 16:09:30 +0000 (10:09 -0600)
Add a thermal cooling driver to provide path to access PCIe bandwidth
controller using the usual thermal interfaces.

A cooling device is instantiated for controllable PCIe Ports from the
bwctrl service driver.

If registering the cooling device fails, allow bwctrl's probe to succeed
regardless. As cdev in that case contains IS_ERR() pseudo "pointer", clean
that up inside the probe function so the remove side doesn't need to
suddenly make an odd looking IS_ERR() check.

The thermal side state 0 means no throttling, i.e., maximum supported PCIe
Link Speed.

Link: https://lore.kernel.org/r/20241018144755.7875-9-ilpo.jarvinen@linux.intel.com
Signed-off-by: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
[bhelgaas: dropped data->cdev test per
https://lore.kernel.org/r/ZzRm1SJTwEMRsAr8@wunner.de]
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Acked-by: Rafael J. Wysocki <rafael@kernel.org> # From the cooling device interface perspective
MAINTAINERS
drivers/pci/pcie/bwctrl.c
drivers/thermal/Kconfig
drivers/thermal/Makefile
drivers/thermal/pcie_cooling.c [new file with mode: 0644]
include/linux/pci-bwctrl.h [new file with mode: 0644]

index 8c555b3325d65feaf2a37965c3806f2cc3af340d..393ed7ce5ea1890f9866d5ad2836c5f76de1d16d 100644 (file)
@@ -17938,6 +17938,8 @@ M:      Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
 L:     linux-pci@vger.kernel.org
 S:     Supported
 F:     drivers/pci/pcie/bwctrl.c
+F:     drivers/thermal/pcie_cooling.c
+F:     include/linux/pci-bwctrl.h
 
 PCIE DRIVER FOR AMAZON ANNAPURNA LABS
 M:     Jonathan Chocron <jonnyc@amazon.com>
index 3cd3e2d066c982e01a3e8c8c212dbc2743ee232b..b59cacc740fa24d5f08a679ed4c0564ffe236b94 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/mutex.h>
 #include <linux/pci.h>
+#include <linux/pci-bwctrl.h>
 #include <linux/rwsem.h>
 #include <linux/slab.h>
 #include <linux/types.h>
  * struct pcie_bwctrl_data - PCIe bandwidth controller
  * @set_speed_mutex:   Serializes link speed changes
  * @lbms_count:                Count for LBMS (since last reset)
+ * @cdev:              Thermal cooling device associated with the port
  */
 struct pcie_bwctrl_data {
        struct mutex set_speed_mutex;
        atomic_t lbms_count;
+       struct thermal_cooling_device *cdev;
 };
 
 /*
@@ -314,11 +317,20 @@ static int pcie_bwnotif_probe(struct pcie_device *srv)
 
        pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
 
+       /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
+       port->link_bwctrl->cdev = pcie_cooling_device_register(port);
+       if (IS_ERR(port->link_bwctrl->cdev))
+               port->link_bwctrl->cdev = NULL;
+
        return 0;
 }
 
 static void pcie_bwnotif_remove(struct pcie_device *srv)
 {
+       struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
+
+       pcie_cooling_device_unregister(data->cdev);
+
        pcie_bwnotif_disable(srv->port);
 
        scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem)
index 61e7ae524b1f8549591b35a43e0edcd132a89325..d3f9686e26e71099e0a6307202f6d7ded99c593e 100644 (file)
@@ -220,6 +220,15 @@ config DEVFREQ_THERMAL
 
          If you want this support, you should say Y here.
 
+config PCIE_THERMAL
+       bool "PCIe cooling support"
+       depends on PCIEPORTBUS
+       help
+         This implements PCIe cooling mechanism through bandwidth reduction
+         for PCIe devices.
+
+         If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
        bool "Thermal emulation mode support"
        help
index 41c4d56beb40d01bde32d48ed87e9ac16c5b363c..210c16c9146150aa01c413fb72dbb69c52b810cc 100644 (file)
@@ -31,6 +31,8 @@ thermal_sys-$(CONFIG_CPU_IDLE_THERMAL)        += cpuidle_cooling.o
 # devfreq cooling
 thermal_sys-$(CONFIG_DEVFREQ_THERMAL) += devfreq_cooling.o
 
+thermal_sys-$(CONFIG_PCIE_THERMAL) += pcie_cooling.o
+
 obj-$(CONFIG_K3_THERMAL)       += k3_bandgap.o k3_j72xx_bandgap.o
 # platform thermal drivers
 obj-y                          += broadcom/
diff --git a/drivers/thermal/pcie_cooling.c b/drivers/thermal/pcie_cooling.c
new file mode 100644 (file)
index 0000000..a876d64
--- /dev/null
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * PCIe cooling device
+ *
+ * Copyright (C) 2023-2024 Intel Corporation
+ */
+
+#include <linux/build_bug.h>
+#include <linux/cleanup.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/pci-bwctrl.h>
+#include <linux/slab.h>
+#include <linux/sprintf.h>
+#include <linux/thermal.h>
+
+#define COOLING_DEV_TYPE_PREFIX                "PCIe_Port_Link_Speed_"
+
+static int pcie_cooling_get_max_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+       struct pci_dev *port = cdev->devdata;
+
+       /* cooling state 0 is same as the maximum PCIe speed */
+       *state = port->subordinate->max_bus_speed - PCIE_SPEED_2_5GT;
+
+       return 0;
+}
+
+static int pcie_cooling_get_cur_level(struct thermal_cooling_device *cdev, unsigned long *state)
+{
+       struct pci_dev *port = cdev->devdata;
+
+       /* cooling state 0 is same as the maximum PCIe speed */
+       *state = cdev->max_state - (port->subordinate->cur_bus_speed - PCIE_SPEED_2_5GT);
+
+       return 0;
+}
+
+static int pcie_cooling_set_cur_level(struct thermal_cooling_device *cdev, unsigned long state)
+{
+       struct pci_dev *port = cdev->devdata;
+       enum pci_bus_speed speed;
+
+       /* cooling state 0 is same as the maximum PCIe speed */
+       speed = (cdev->max_state - state) + PCIE_SPEED_2_5GT;
+
+       return pcie_set_target_speed(port, speed, true);
+}
+
+static struct thermal_cooling_device_ops pcie_cooling_ops = {
+       .get_max_state = pcie_cooling_get_max_level,
+       .get_cur_state = pcie_cooling_get_cur_level,
+       .set_cur_state = pcie_cooling_set_cur_level,
+};
+
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port)
+{
+       char *name __free(kfree) =
+               kasprintf(GFP_KERNEL, COOLING_DEV_TYPE_PREFIX "%s", pci_name(port));
+       if (!name)
+               return ERR_PTR(-ENOMEM);
+
+       return thermal_cooling_device_register(name, port, &pcie_cooling_ops);
+}
+
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+       thermal_cooling_device_unregister(cdev);
+}
+
+/* For bus_speed <-> state arithmetic */
+static_assert(PCIE_SPEED_2_5GT + 1 == PCIE_SPEED_5_0GT);
+static_assert(PCIE_SPEED_5_0GT + 1 == PCIE_SPEED_8_0GT);
+static_assert(PCIE_SPEED_8_0GT + 1 == PCIE_SPEED_16_0GT);
+static_assert(PCIE_SPEED_16_0GT + 1 == PCIE_SPEED_32_0GT);
+static_assert(PCIE_SPEED_32_0GT + 1 == PCIE_SPEED_64_0GT);
+
+MODULE_AUTHOR("Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>");
+MODULE_DESCRIPTION("PCIe cooling driver");
diff --git a/include/linux/pci-bwctrl.h b/include/linux/pci-bwctrl.h
new file mode 100644 (file)
index 0000000..cee0712
--- /dev/null
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * PCIe bandwidth controller
+ *
+ * Copyright (C) 2023-2024 Intel Corporation
+ */
+
+#ifndef LINUX_PCI_BWCTRL_H
+#define LINUX_PCI_BWCTRL_H
+
+#include <linux/pci.h>
+
+struct thermal_cooling_device;
+
+#ifdef CONFIG_PCIE_THERMAL
+struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port);
+void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev);
+#else
+static inline struct thermal_cooling_device *pcie_cooling_device_register(struct pci_dev *port)
+{
+       return NULL;
+}
+static inline void pcie_cooling_device_unregister(struct thermal_cooling_device *cdev)
+{
+}
+#endif
+
+#endif