6.1-stable patches

author Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)

committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>

Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)
author Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)
committer Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)
diff --git a/queue-6.1/drm-amd-pm-avoid-unintentional-shutdown-due-to-temperature-momentary-fluctuation.patch b/queue-6.1/drm-amd-pm-avoid-unintentional-shutdown-due-to-temperature-momentary-fluctuation.patch

new file mode 100644 (file)

index 0000000..75179b4
--- /dev/null
+++ b/queue-6.1/drm-amd-pm-avoid-unintentional-shutdown-due-to-temperature-momentary-fluctuation.patch
@@ -0,0 +1,308 @@
+From stable-owner@vger.kernel.org Fri Aug 11 18:41:02 2023
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Fri, 11 Aug 2023 11:40:31 -0500
+Subject: drm/amd/pm: avoid unintentional shutdown due to temperature momentary fluctuation
+To: <stable@vger.kernel.org>
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Message-ID: <20230811164031.24687-5-mario.limonciello@amd.com>
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit b75efe88b20c2be28b67e2821a794cc183e32374 upstream
+
+An intentional delay is added on soft ctf triggered. Then there will
+be a double check for the GPU temperature before taking further
+action. This can avoid unintended shutdown due to temperature
+momentary fluctuation.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+[ Hand-modified because:
+  * XCP support added to amdgpu.h in kernel 6.5
+    and is not necessary for this fix.
+  * SMU microcode initialization moved in
+    32806038aa76 ("drm/amd: Load SMU microcode during early_init") ]
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/1267
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2779
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu.h                 |    3 +
+ drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c    |   48 ++++++++++++++++++++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c |   27 +++--------
+ drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h        |    2 
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c           |   34 ++++++++++++++
+ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h       |    2 
+ drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c      |    9 ---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c      |    9 ---
+ 8 files changed, 102 insertions(+), 32 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+@@ -284,6 +284,9 @@ extern int amdgpu_sg_display;
+ #define AMDGPU_SMARTSHIFT_MAX_BIAS (100)
+ #define AMDGPU_SMARTSHIFT_MIN_BIAS (-100)
+ 
++/* Extra time delay(in ms) to eliminate the influence of temperature momentary fluctuation */
++#define AMDGPU_SWCTF_EXTRA_DELAY              50
++
+ struct amdgpu_device;
+ struct amdgpu_irq_src;
+ struct amdgpu_fpriv;
+--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+@@ -26,6 +26,7 @@
+ #include <linux/gfp.h>
+ #include <linux/slab.h>
+ #include <linux/firmware.h>
++#include <linux/reboot.h>
+ #include "amd_shared.h"
+ #include "amd_powerplay.h"
+ #include "power_state.h"
+@@ -91,6 +92,45 @@ static int pp_early_init(void *handle)
+       return 0;
+ }
+ 
++static void pp_swctf_delayed_work_handler(struct work_struct *work)
++{
++      struct pp_hwmgr *hwmgr =
++              container_of(work, struct pp_hwmgr, swctf_delayed_work.work);
++      struct amdgpu_device *adev = hwmgr->adev;
++      struct amdgpu_dpm_thermal *range =
++                              &adev->pm.dpm.thermal;
++      uint32_t gpu_temperature, size;
++      int ret;
++
++      /*
++       * If the hotspot/edge temperature is confirmed as below SW CTF setting point
++       * after the delay enforced, nothing will be done.
++       * Otherwise, a graceful shutdown will be performed to prevent further damage.
++       */
++      if (range->sw_ctf_threshold &&
++          hwmgr->hwmgr_func->read_sensor) {
++              ret = hwmgr->hwmgr_func->read_sensor(hwmgr,
++                                                   AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
++                                                   &gpu_temperature,
++                                                   &size);
++              /*
++               * For some legacy ASICs, hotspot temperature retrieving might be not
++               * supported. Check the edge temperature instead then.
++               */
++              if (ret == -EOPNOTSUPP)
++                      ret = hwmgr->hwmgr_func->read_sensor(hwmgr,
++                                                           AMDGPU_PP_SENSOR_EDGE_TEMP,
++                                                           &gpu_temperature,
++                                                           &size);
++              if (!ret && gpu_temperature / 1000 < range->sw_ctf_threshold)
++                      return;
++      }
++
++      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
++      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
++      orderly_poweroff(true);
++}
++
+ static int pp_sw_init(void *handle)
+ {
+       struct amdgpu_device *adev = handle;
+@@ -101,6 +141,10 @@ static int pp_sw_init(void *handle)
+ 
+       pr_debug("powerplay sw init %s\n", ret ? "failed" : "successfully");
+ 
++      if (!ret)
++              INIT_DELAYED_WORK(&hwmgr->swctf_delayed_work,
++                                pp_swctf_delayed_work_handler);
++
+       return ret;
+ }
+ 
+@@ -136,6 +180,8 @@ static int pp_hw_fini(void *handle)
+       struct amdgpu_device *adev = handle;
+       struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+ 
++      cancel_delayed_work_sync(&hwmgr->swctf_delayed_work);
++
+       hwmgr_hw_fini(hwmgr);
+ 
+       return 0;
+@@ -222,6 +268,8 @@ static int pp_suspend(void *handle)
+       struct amdgpu_device *adev = handle;
+       struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+ 
++      cancel_delayed_work_sync(&hwmgr->swctf_delayed_work);
++
+       return hwmgr_suspend(hwmgr);
+ }
+ 
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu_helper.c
+@@ -603,21 +603,17 @@ int phm_irq_process(struct amdgpu_device
+                          struct amdgpu_irq_src *source,
+                          struct amdgpu_iv_entry *entry)
+ {
++      struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+       uint32_t client_id = entry->client_id;
+       uint32_t src_id = entry->src_id;
+ 
+       if (client_id == AMDGPU_IRQ_CLIENTID_LEGACY) {
+               if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_LOW_TO_HIGH) {
+-                      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+-                      /*
+-                       * SW CTF just occurred.
+-                       * Try to do a graceful shutdown to prevent further damage.
+-                       */
+-                      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+-                      orderly_poweroff(true);
+-              } else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW)
++                      schedule_delayed_work(&hwmgr->swctf_delayed_work,
++                                            msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY));
++              } else if (src_id == VISLANDS30_IV_SRCID_CG_TSS_THERMAL_HIGH_TO_LOW) {
+                       dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n");
+-              else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) {
++              } else if (src_id == VISLANDS30_IV_SRCID_GPIO_19) {
+                       dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+                       /*
+                        * HW CTF just occurred. Shutdown to prevent further damage.
+@@ -626,15 +622,10 @@ int phm_irq_process(struct amdgpu_device
+                       orderly_poweroff(true);
+               }
+       } else if (client_id == SOC15_IH_CLIENTID_THM) {
+-              if (src_id == 0) {
+-                      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+-                      /*
+-                       * SW CTF just occurred.
+-                       * Try to do a graceful shutdown to prevent further damage.
+-                       */
+-                      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+-                      orderly_poweroff(true);
+-              } else
++              if (src_id == 0)
++                      schedule_delayed_work(&hwmgr->swctf_delayed_work,
++                                            msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY));
++              else
+                       dev_emerg(adev->dev, "ERROR: GPU under temperature range detected!\n");
+       } else if (client_id == SOC15_IH_CLIENTID_ROM_SMUIO) {
+               dev_emerg(adev->dev, "ERROR: GPU HW Critical Temperature Fault(aka CTF) detected!\n");
+--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
++++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+@@ -811,6 +811,8 @@ struct pp_hwmgr {
+       bool gfxoff_state_changed_by_workload;
+       uint32_t pstate_sclk_peak;
+       uint32_t pstate_mclk_peak;
++
++      struct delayed_work swctf_delayed_work;
+ };
+ 
+ int hwmgr_early_init(struct pp_hwmgr *hwmgr);
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -24,6 +24,7 @@
+ 
+ #include <linux/firmware.h>
+ #include <linux/pci.h>
++#include <linux/reboot.h>
+ 
+ #include "amdgpu.h"
+ #include "amdgpu_smu.h"
+@@ -1061,6 +1062,34 @@ static void smu_interrupt_work_fn(struct
+               smu->ppt_funcs->interrupt_work(smu);
+ }
+ 
++static void smu_swctf_delayed_work_handler(struct work_struct *work)
++{
++      struct smu_context *smu =
++              container_of(work, struct smu_context, swctf_delayed_work.work);
++      struct smu_temperature_range *range =
++                              &smu->thermal_range;
++      struct amdgpu_device *adev = smu->adev;
++      uint32_t hotspot_tmp, size;
++
++      /*
++       * If the hotspot temperature is confirmed as below SW CTF setting point
++       * after the delay enforced, nothing will be done.
++       * Otherwise, a graceful shutdown will be performed to prevent further damage.
++       */
++      if (range->software_shutdown_temp &&
++          smu->ppt_funcs->read_sensor &&
++          !smu->ppt_funcs->read_sensor(smu,
++                                       AMDGPU_PP_SENSOR_HOTSPOT_TEMP,
++                                       &hotspot_tmp,
++                                       &size) &&
++          hotspot_tmp / 1000 < range->software_shutdown_temp)
++              return;
++
++      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
++      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
++      orderly_poweroff(true);
++}
++
+ static int smu_sw_init(void *handle)
+ {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+@@ -1109,6 +1138,9 @@ static int smu_sw_init(void *handle)
+               return ret;
+       }
+ 
++      INIT_DELAYED_WORK(&smu->swctf_delayed_work,
++                        smu_swctf_delayed_work_handler);
++
+       ret = smu_smc_table_sw_init(smu);
+       if (ret) {
+               dev_err(adev->dev, "Failed to sw init smc table!\n");
+@@ -1581,6 +1613,8 @@ static int smu_smc_hw_cleanup(struct smu
+               return ret;
+       }
+ 
++      cancel_delayed_work_sync(&smu->swctf_delayed_work);
++
+       ret = smu_disable_dpms(smu);
+       if (ret) {
+               dev_err(adev->dev, "Fail to disable dpm features!\n");
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+@@ -573,6 +573,8 @@ struct smu_context
+       u32 debug_param_reg;
+       u32 debug_msg_reg;
+       u32 debug_resp_reg;
++
++      struct delayed_work             swctf_delayed_work;
+ };
+ 
+ struct i2c_adapter;
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
+@@ -1438,13 +1438,8 @@ static int smu_v11_0_irq_process(struct
+       if (client_id == SOC15_IH_CLIENTID_THM) {
+               switch (src_id) {
+               case THM_11_0__SRCID__THM_DIG_THERM_L2H:
+-                      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+-                      /*
+-                       * SW CTF just occurred.
+-                       * Try to do a graceful shutdown to prevent further damage.
+-                       */
+-                      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+-                      orderly_poweroff(true);
++                      schedule_delayed_work(&smu->swctf_delayed_work,
++                                            msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY));
+               break;
+               case THM_11_0__SRCID__THM_DIG_THERM_H2L:
+                       dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n");
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0.c
+@@ -1386,13 +1386,8 @@ static int smu_v13_0_irq_process(struct
+       if (client_id == SOC15_IH_CLIENTID_THM) {
+               switch (src_id) {
+               case THM_11_0__SRCID__THM_DIG_THERM_L2H:
+-                      dev_emerg(adev->dev, "ERROR: GPU over temperature range(SW CTF) detected!\n");
+-                      /*
+-                       * SW CTF just occurred.
+-                       * Try to do a graceful shutdown to prevent further damage.
+-                       */
+-                      dev_emerg(adev->dev, "ERROR: System is going to shutdown due to GPU SW CTF!\n");
+-                      orderly_poweroff(true);
++                      schedule_delayed_work(&smu->swctf_delayed_work,
++                                            msecs_to_jiffies(AMDGPU_SWCTF_EXTRA_DELAY));
+                       break;
+               case THM_11_0__SRCID__THM_DIG_THERM_H2L:
+                       dev_emerg(adev->dev, "ERROR: GPU under temperature range detected\n");
diff --git a/queue-6.1/drm-amd-pm-expose-swctf-threshold-setting-for-legacy-powerplay.patch b/queue-6.1/drm-amd-pm-expose-swctf-threshold-setting-for-legacy-powerplay.patch

new file mode 100644 (file)

index 0000000..56db649
--- /dev/null
+++ b/queue-6.1/drm-amd-pm-expose-swctf-threshold-setting-for-legacy-powerplay.patch
@@ -0,0 +1,148 @@
+From stable-owner@vger.kernel.org Fri Aug 11 18:40:59 2023
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Fri, 11 Aug 2023 11:40:29 -0500
+Subject: drm/amd/pm: expose swctf threshold setting for legacy powerplay
+To: <stable@vger.kernel.org>
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Message-ID: <20230811164031.24687-3-mario.limonciello@amd.com>
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit 064329c595da56eff6d7a7e7760660c726433139 upstream
+
+Preparation for coming optimization which eliminates the influence of
+GPU temperature momentary fluctuation.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h                  |    2 ++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c |    4 +++-
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c      |    2 ++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c    |   10 ++++++++++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c    |    4 ++++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c    |    4 ++++
+ drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h       |    1 +
+ 7 files changed, 26 insertions(+), 1 deletion(-)
+
+--- a/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
++++ b/drivers/gpu/drm/amd/pm/inc/amdgpu_dpm.h
+@@ -89,6 +89,8 @@ struct amdgpu_dpm_thermal {
+       int                max_mem_crit_temp;
+       /* memory max emergency(shutdown) temp */
+       int                max_mem_emergency_temp;
++      /* SWCTF threshold */
++      int                sw_ctf_threshold;
+       /* was last interrupt low to high or high to low */
+       bool               high_to_low;
+       /* interrupt source */
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/hardwaremanager.c
+@@ -241,7 +241,8 @@ int phm_start_thermal_controller(struct
+               TEMP_RANGE_MAX,
+               TEMP_RANGE_MIN,
+               TEMP_RANGE_MAX,
+-              TEMP_RANGE_MAX};
++              TEMP_RANGE_MAX,
++              0};
+       struct amdgpu_device *adev = hwmgr->adev;
+ 
+       if (!hwmgr->not_vf)
+@@ -265,6 +266,7 @@ int phm_start_thermal_controller(struct
+       adev->pm.dpm.thermal.min_mem_temp = range.mem_min;
+       adev->pm.dpm.thermal.max_mem_crit_temp = range.mem_crit_max;
+       adev->pm.dpm.thermal.max_mem_emergency_temp = range.mem_emergency_max;
++      adev->pm.dpm.thermal.sw_ctf_threshold = range.sw_ctf_threshold;
+ 
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+@@ -5381,6 +5381,8 @@ static int smu7_get_thermal_temperature_
+               thermal_data->max = data->thermal_temp_setting.temperature_shutdown *
+                       PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ 
++      thermal_data->sw_ctf_threshold = thermal_data->max;
++
+       return 0;
+ }
+ 
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+@@ -5221,6 +5221,9 @@ static int vega10_get_thermal_temperatur
+ {
+       struct vega10_hwmgr *data = hwmgr->backend;
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
++      struct phm_ppt_v2_information *pp_table_info =
++              (struct phm_ppt_v2_information *)(hwmgr->pptable);
++      struct phm_tdp_table *tdp_table = pp_table_info->tdp_table;
+ 
+       memcpy(thermal_data, &SMU7ThermalWithDelayPolicy[0], sizeof(struct PP_TemperatureRange));
+ 
+@@ -5237,6 +5240,13 @@ static int vega10_get_thermal_temperatur
+       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ 
++      if (tdp_table->usSoftwareShutdownTemp > pp_table->ThotspotLimit &&
++          tdp_table->usSoftwareShutdownTemp < VEGA10_THERMAL_MAXIMUM_ALERT_TEMP)
++              thermal_data->sw_ctf_threshold = tdp_table->usSoftwareShutdownTemp;
++      else
++              thermal_data->sw_ctf_threshold = VEGA10_THERMAL_MAXIMUM_ALERT_TEMP;
++      thermal_data->sw_ctf_threshold *= PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
++
+       return 0;
+ }
+ 
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+@@ -2742,6 +2742,8 @@ static int vega12_notify_cac_buffer_info
+ static int vega12_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
+               struct PP_TemperatureRange *thermal_data)
+ {
++      struct phm_ppt_v3_information *pptable_information =
++              (struct phm_ppt_v3_information *)hwmgr->pptable;
+       struct vega12_hwmgr *data =
+                       (struct vega12_hwmgr *)(hwmgr->backend);
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+@@ -2760,6 +2762,8 @@ static int vega12_get_thermal_temperatur
+               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
++      thermal_data->sw_ctf_threshold = pptable_information->us_software_shutdown_temp *
++              PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ 
+       return 0;
+ }
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+@@ -4213,6 +4213,8 @@ static int vega20_notify_cac_buffer_info
+ static int vega20_get_thermal_temperature_range(struct pp_hwmgr *hwmgr,
+               struct PP_TemperatureRange *thermal_data)
+ {
++      struct phm_ppt_v3_information *pptable_information =
++              (struct phm_ppt_v3_information *)hwmgr->pptable;
+       struct vega20_hwmgr *data =
+                       (struct vega20_hwmgr *)(hwmgr->backend);
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+@@ -4231,6 +4233,8 @@ static int vega20_get_thermal_temperatur
+               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+       thermal_data->mem_emergency_max = (pp_table->ThbmLimit + CTF_OFFSET_HBM)*
+               PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
++      thermal_data->sw_ctf_threshold = pptable_information->us_software_shutdown_temp *
++              PP_TEMPERATURE_UNITS_PER_CENTIGRADES;
+ 
+       return 0;
+ }
+--- a/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h
++++ b/drivers/gpu/drm/amd/pm/powerplay/inc/power_state.h
+@@ -131,6 +131,7 @@ struct PP_TemperatureRange {
+       int mem_min;
+       int mem_crit_max;
+       int mem_emergency_max;
++      int sw_ctf_threshold;
+ };
+ 
+ struct PP_StateValidationBlock {
diff --git a/queue-6.1/drm-amd-pm-fulfill-powerplay-peak-profiling-mode-shader-memory-clock-settings.patch b/queue-6.1/drm-amd-pm-fulfill-powerplay-peak-profiling-mode-shader-memory-clock-settings.patch

new file mode 100644 (file)

index 0000000..ea18915
--- /dev/null
+++ b/queue-6.1/drm-amd-pm-fulfill-powerplay-peak-profiling-mode-shader-memory-clock-settings.patch
@@ -0,0 +1,417 @@
+From stable-owner@vger.kernel.org Fri Aug 11 18:41:02 2023
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Fri, 11 Aug 2023 11:40:30 -0500
+Subject: drm/amd/pm: fulfill powerplay peak profiling mode shader/memory clock settings
+To: <stable@vger.kernel.org>
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Message-ID: <20230811164031.24687-4-mario.limonciello@amd.com>
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit b1a9557a7d00c758ed9e701fbb3445a13a49506f upstream
+
+Enable peak profiling mode shader/memory clock reporting for powerplay
+framework.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c      |   10 +-
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c  |   16 +++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c   |   76 ++++++++++++++----
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c   |   16 +++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c |   31 ++++++-
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c |   22 +++++
+ drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c |   20 +---
+ drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h          |    2 
+ 8 files changed, 155 insertions(+), 38 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/amd_powerplay.c
+@@ -769,10 +769,16 @@ static int pp_dpm_read_sensor(void *hand
+ 
+       switch (idx) {
+       case AMDGPU_PP_SENSOR_STABLE_PSTATE_SCLK:
+-              *((uint32_t *)value) = hwmgr->pstate_sclk;
++              *((uint32_t *)value) = hwmgr->pstate_sclk * 100;
+               return 0;
+       case AMDGPU_PP_SENSOR_STABLE_PSTATE_MCLK:
+-              *((uint32_t *)value) = hwmgr->pstate_mclk;
++              *((uint32_t *)value) = hwmgr->pstate_mclk * 100;
++              return 0;
++      case AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK:
++              *((uint32_t *)value) = hwmgr->pstate_sclk_peak * 100;
++              return 0;
++      case AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK:
++              *((uint32_t *)value) = hwmgr->pstate_mclk_peak * 100;
+               return 0;
+       case AMDGPU_PP_SENSOR_MIN_FAN_RPM:
+               *((uint32_t *)value) = hwmgr->thermal_controller.fanInfo.ulMinRPM;
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu10_hwmgr.c
+@@ -375,6 +375,17 @@ static int smu10_enable_gfx_off(struct p
+       return 0;
+ }
+ 
++static void smu10_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
++{
++      hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK;
++      hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK;
++
++      smum_send_msg_to_smc(hwmgr,
++                           PPSMC_MSG_GetMaxGfxclkFrequency,
++                           &hwmgr->pstate_sclk_peak);
++      hwmgr->pstate_mclk_peak = SMU10_UMD_PSTATE_PEAK_FCLK;
++}
++
+ static int smu10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
+ {
+       struct amdgpu_device *adev = hwmgr->adev;
+@@ -398,6 +409,8 @@ static int smu10_enable_dpm_tasks(struct
+                       return ret;
+       }
+ 
++      smu10_populate_umdpstate_clocks(hwmgr);
++
+       return 0;
+ }
+ 
+@@ -574,9 +587,6 @@ static int smu10_hwmgr_backend_init(stru
+ 
+       hwmgr->platform_descriptor.minimumClocksReductionPercentage = 50;
+ 
+-      hwmgr->pstate_sclk = SMU10_UMD_PSTATE_GFXCLK * 100;
+-      hwmgr->pstate_mclk = SMU10_UMD_PSTATE_FCLK * 100;
+-
+       /* enable the pp_od_clk_voltage sysfs file */
+       hwmgr->od_enabled = 1;
+       /* disabled fine grain tuning function by default */
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
+@@ -1501,6 +1501,65 @@ static int smu7_populate_edc_leakage_reg
+       return ret;
+ }
+ 
++static void smu7_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
++{
++      struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
++      struct smu7_dpm_table *golden_dpm_table = &data->golden_dpm_table;
++      struct phm_clock_voltage_dependency_table *vddc_dependency_on_sclk =
++                      hwmgr->dyn_state.vddc_dependency_on_sclk;
++      struct phm_ppt_v1_information *table_info =
++                      (struct phm_ppt_v1_information *)(hwmgr->pptable);
++      struct phm_ppt_v1_clock_voltage_dependency_table *vdd_dep_on_sclk =
++                      table_info->vdd_dep_on_sclk;
++      int32_t tmp_sclk, count, percentage;
++
++      if (golden_dpm_table->mclk_table.count == 1) {
++              percentage = 70;
++              hwmgr->pstate_mclk = golden_dpm_table->mclk_table.dpm_levels[0].value;
++      } else {
++              percentage = 100 * golden_dpm_table->sclk_table.dpm_levels[golden_dpm_table->sclk_table.count - 1].value /
++                              golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count - 1].value;
++              hwmgr->pstate_mclk = golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count - 2].value;
++      }
++
++      tmp_sclk = hwmgr->pstate_mclk * percentage / 100;
++
++      if (hwmgr->pp_table_version == PP_TABLE_V0) {
++              for (count = vddc_dependency_on_sclk->count - 1; count >= 0; count--) {
++                      if (tmp_sclk >= vddc_dependency_on_sclk->entries[count].clk) {
++                              hwmgr->pstate_sclk = vddc_dependency_on_sclk->entries[count].clk;
++                              break;
++                      }
++              }
++              if (count < 0)
++                      hwmgr->pstate_sclk = vddc_dependency_on_sclk->entries[0].clk;
++
++              hwmgr->pstate_sclk_peak =
++                      vddc_dependency_on_sclk->entries[vddc_dependency_on_sclk->count - 1].clk;
++      } else if (hwmgr->pp_table_version == PP_TABLE_V1) {
++              for (count = vdd_dep_on_sclk->count - 1; count >= 0; count--) {
++                      if (tmp_sclk >= vdd_dep_on_sclk->entries[count].clk) {
++                              hwmgr->pstate_sclk = vdd_dep_on_sclk->entries[count].clk;
++                              break;
++                      }
++              }
++              if (count < 0)
++                      hwmgr->pstate_sclk = vdd_dep_on_sclk->entries[0].clk;
++
++              hwmgr->pstate_sclk_peak =
++                      vdd_dep_on_sclk->entries[vdd_dep_on_sclk->count - 1].clk;
++      }
++
++      hwmgr->pstate_mclk_peak =
++              golden_dpm_table->mclk_table.dpm_levels[golden_dpm_table->mclk_table.count - 1].value;
++
++      /* make sure the output is in Mhz */
++      hwmgr->pstate_sclk /= 100;
++      hwmgr->pstate_mclk /= 100;
++      hwmgr->pstate_sclk_peak /= 100;
++      hwmgr->pstate_mclk_peak /= 100;
++}
++
+ static int smu7_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
+ {
+       int tmp_result = 0;
+@@ -1625,6 +1684,8 @@ static int smu7_enable_dpm_tasks(struct
+       PP_ASSERT_WITH_CODE((0 == tmp_result),
+                       "pcie performance request failed!", result = tmp_result);
+ 
++      smu7_populate_umdpstate_clocks(hwmgr);
++
+       return 0;
+ }
+ 
+@@ -3143,15 +3204,12 @@ static int smu7_get_profiling_clk(struct
+               for (count = hwmgr->dyn_state.vddc_dependency_on_sclk->count-1;
+                       count >= 0; count--) {
+                       if (tmp_sclk >= hwmgr->dyn_state.vddc_dependency_on_sclk->entries[count].clk) {
+-                              tmp_sclk = hwmgr->dyn_state.vddc_dependency_on_sclk->entries[count].clk;
+                               *sclk_mask = count;
+                               break;
+                       }
+               }
+-              if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) {
++              if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+                       *sclk_mask = 0;
+-                      tmp_sclk = hwmgr->dyn_state.vddc_dependency_on_sclk->entries[0].clk;
+-              }
+ 
+               if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+                       *sclk_mask = hwmgr->dyn_state.vddc_dependency_on_sclk->count-1;
+@@ -3161,15 +3219,12 @@ static int smu7_get_profiling_clk(struct
+ 
+               for (count = table_info->vdd_dep_on_sclk->count-1; count >= 0; count--) {
+                       if (tmp_sclk >= table_info->vdd_dep_on_sclk->entries[count].clk) {
+-                              tmp_sclk = table_info->vdd_dep_on_sclk->entries[count].clk;
+                               *sclk_mask = count;
+                               break;
+                       }
+               }
+-              if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) {
++              if (count < 0 || level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK)
+                       *sclk_mask = 0;
+-                      tmp_sclk =  table_info->vdd_dep_on_sclk->entries[0].clk;
+-              }
+ 
+               if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
+                       *sclk_mask = table_info->vdd_dep_on_sclk->count - 1;
+@@ -3181,8 +3236,6 @@ static int smu7_get_profiling_clk(struct
+               *mclk_mask = golden_dpm_table->mclk_table.count - 1;
+ 
+       *pcie_mask = data->dpm_table.pcie_speed_table.count - 1;
+-      hwmgr->pstate_sclk = tmp_sclk;
+-      hwmgr->pstate_mclk = tmp_mclk;
+ 
+       return 0;
+ }
+@@ -3195,9 +3248,6 @@ static int smu7_force_dpm_level(struct p
+       uint32_t mclk_mask = 0;
+       uint32_t pcie_mask = 0;
+ 
+-      if (hwmgr->pstate_sclk == 0)
+-              smu7_get_profiling_clk(hwmgr, level, &sclk_mask, &mclk_mask, &pcie_mask);
+-
+       switch (level) {
+       case AMD_DPM_FORCED_LEVEL_HIGH:
+               ret = smu7_force_dpm_highest(hwmgr);
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu8_hwmgr.c
+@@ -1016,6 +1016,18 @@ static void smu8_reset_acp_boot_level(st
+       data->acp_boot_level = 0xff;
+ }
+ 
++static void smu8_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
++{
++      struct phm_clock_voltage_dependency_table *table =
++                              hwmgr->dyn_state.vddc_dependency_on_sclk;
++
++      hwmgr->pstate_sclk = table->entries[0].clk / 100;
++      hwmgr->pstate_mclk = 0;
++
++      hwmgr->pstate_sclk_peak = table->entries[table->count - 1].clk / 100;
++      hwmgr->pstate_mclk_peak = 0;
++}
++
+ static int smu8_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
+ {
+       smu8_program_voting_clients(hwmgr);
+@@ -1024,6 +1036,8 @@ static int smu8_enable_dpm_tasks(struct
+       smu8_program_bootup_state(hwmgr);
+       smu8_reset_acp_boot_level(hwmgr);
+ 
++      smu8_populate_umdpstate_clocks(hwmgr);
++
+       return 0;
+ }
+ 
+@@ -1167,8 +1181,6 @@ static int smu8_phm_unforce_dpm_levels(s
+ 
+       data->sclk_dpm.soft_min_clk = table->entries[0].clk;
+       data->sclk_dpm.hard_min_clk = table->entries[0].clk;
+-      hwmgr->pstate_sclk = table->entries[0].clk;
+-      hwmgr->pstate_mclk = 0;
+ 
+       level = smu8_get_max_sclk_level(hwmgr) - 1;
+ 
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
+@@ -3008,6 +3008,30 @@ static int vega10_enable_disable_PCC_lim
+       return 0;
+ }
+ 
++static void vega10_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
++{
++      struct phm_ppt_v2_information *table_info =
++                      (struct phm_ppt_v2_information *)(hwmgr->pptable);
++
++      if (table_info->vdd_dep_on_sclk->count > VEGA10_UMD_PSTATE_GFXCLK_LEVEL &&
++          table_info->vdd_dep_on_mclk->count > VEGA10_UMD_PSTATE_MCLK_LEVEL) {
++              hwmgr->pstate_sclk = table_info->vdd_dep_on_sclk->entries[VEGA10_UMD_PSTATE_GFXCLK_LEVEL].clk;
++              hwmgr->pstate_mclk = table_info->vdd_dep_on_mclk->entries[VEGA10_UMD_PSTATE_MCLK_LEVEL].clk;
++      } else {
++              hwmgr->pstate_sclk = table_info->vdd_dep_on_sclk->entries[0].clk;
++              hwmgr->pstate_mclk = table_info->vdd_dep_on_mclk->entries[0].clk;
++      }
++
++      hwmgr->pstate_sclk_peak = table_info->vdd_dep_on_sclk->entries[table_info->vdd_dep_on_sclk->count - 1].clk;
++      hwmgr->pstate_mclk_peak = table_info->vdd_dep_on_mclk->entries[table_info->vdd_dep_on_mclk->count - 1].clk;
++
++      /* make sure the output is in Mhz */
++      hwmgr->pstate_sclk /= 100;
++      hwmgr->pstate_mclk /= 100;
++      hwmgr->pstate_sclk_peak /= 100;
++      hwmgr->pstate_mclk_peak /= 100;
++}
++
+ static int vega10_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
+ {
+       struct vega10_hwmgr *data = hwmgr->backend;
+@@ -3082,6 +3106,8 @@ static int vega10_enable_dpm_tasks(struc
+                                   result = tmp_result);
+       }
+ 
++      vega10_populate_umdpstate_clocks(hwmgr);
++
+       return result;
+ }
+ 
+@@ -4169,8 +4195,6 @@ static int vega10_get_profiling_clk_mask
+               *sclk_mask = VEGA10_UMD_PSTATE_GFXCLK_LEVEL;
+               *soc_mask = VEGA10_UMD_PSTATE_SOCCLK_LEVEL;
+               *mclk_mask = VEGA10_UMD_PSTATE_MCLK_LEVEL;
+-              hwmgr->pstate_sclk = table_info->vdd_dep_on_sclk->entries[VEGA10_UMD_PSTATE_GFXCLK_LEVEL].clk;
+-              hwmgr->pstate_mclk = table_info->vdd_dep_on_mclk->entries[VEGA10_UMD_PSTATE_MCLK_LEVEL].clk;
+       }
+ 
+       if (level == AMD_DPM_FORCED_LEVEL_PROFILE_MIN_SCLK) {
+@@ -4281,9 +4305,6 @@ static int vega10_dpm_force_dpm_level(st
+       uint32_t mclk_mask = 0;
+       uint32_t soc_mask = 0;
+ 
+-      if (hwmgr->pstate_sclk == 0)
+-              vega10_get_profiling_clk_mask(hwmgr, level, &sclk_mask, &mclk_mask, &soc_mask);
+-
+       switch (level) {
+       case AMD_DPM_FORCED_LEVEL_HIGH:
+               ret = vega10_force_dpm_highest(hwmgr);
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
+@@ -1026,6 +1026,25 @@ static int vega12_get_all_clock_ranges(s
+       return 0;
+ }
+ 
++static void vega12_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
++{
++      struct vega12_hwmgr *data = (struct vega12_hwmgr *)(hwmgr->backend);
++      struct vega12_single_dpm_table *gfx_dpm_table = &(data->dpm_table.gfx_table);
++      struct vega12_single_dpm_table *mem_dpm_table = &(data->dpm_table.mem_table);
++
++      if (gfx_dpm_table->count > VEGA12_UMD_PSTATE_GFXCLK_LEVEL &&
++          mem_dpm_table->count > VEGA12_UMD_PSTATE_MCLK_LEVEL) {
++              hwmgr->pstate_sclk = gfx_dpm_table->dpm_levels[VEGA12_UMD_PSTATE_GFXCLK_LEVEL].value;
++              hwmgr->pstate_mclk = mem_dpm_table->dpm_levels[VEGA12_UMD_PSTATE_MCLK_LEVEL].value;
++      } else {
++              hwmgr->pstate_sclk = gfx_dpm_table->dpm_levels[0].value;
++              hwmgr->pstate_mclk = mem_dpm_table->dpm_levels[0].value;
++      }
++
++      hwmgr->pstate_sclk_peak = gfx_dpm_table->dpm_levels[gfx_dpm_table->count].value;
++      hwmgr->pstate_mclk_peak = mem_dpm_table->dpm_levels[mem_dpm_table->count].value;
++}
++
+ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
+ {
+       int tmp_result, result = 0;
+@@ -1077,6 +1096,9 @@ static int vega12_enable_dpm_tasks(struc
+       PP_ASSERT_WITH_CODE(!result,
+                       "Failed to setup default DPM tables!",
+                       return result);
++
++      vega12_populate_umdpstate_clocks(hwmgr);
++
+       return result;
+ }
+ 
+--- a/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
++++ b/drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
+@@ -1555,26 +1555,23 @@ static int vega20_set_mclk_od(
+       return 0;
+ }
+ 
+-static int vega20_populate_umdpstate_clocks(
+-              struct pp_hwmgr *hwmgr)
++static void vega20_populate_umdpstate_clocks(struct pp_hwmgr *hwmgr)
+ {
+       struct vega20_hwmgr *data = (struct vega20_hwmgr *)(hwmgr->backend);
+       struct vega20_single_dpm_table *gfx_table = &(data->dpm_table.gfx_table);
+       struct vega20_single_dpm_table *mem_table = &(data->dpm_table.mem_table);
+ 
+-      hwmgr->pstate_sclk = gfx_table->dpm_levels[0].value;
+-      hwmgr->pstate_mclk = mem_table->dpm_levels[0].value;
+-
+       if (gfx_table->count > VEGA20_UMD_PSTATE_GFXCLK_LEVEL &&
+           mem_table->count > VEGA20_UMD_PSTATE_MCLK_LEVEL) {
+               hwmgr->pstate_sclk = gfx_table->dpm_levels[VEGA20_UMD_PSTATE_GFXCLK_LEVEL].value;
+               hwmgr->pstate_mclk = mem_table->dpm_levels[VEGA20_UMD_PSTATE_MCLK_LEVEL].value;
++      } else {
++              hwmgr->pstate_sclk = gfx_table->dpm_levels[0].value;
++              hwmgr->pstate_mclk = mem_table->dpm_levels[0].value;
+       }
+ 
+-      hwmgr->pstate_sclk = hwmgr->pstate_sclk * 100;
+-      hwmgr->pstate_mclk = hwmgr->pstate_mclk * 100;
+-
+-      return 0;
++      hwmgr->pstate_sclk_peak = gfx_table->dpm_levels[gfx_table->count - 1].value;
++      hwmgr->pstate_mclk_peak = mem_table->dpm_levels[mem_table->count - 1].value;
+ }
+ 
+ static int vega20_get_max_sustainable_clock(struct pp_hwmgr *hwmgr,
+@@ -1753,10 +1750,7 @@ static int vega20_enable_dpm_tasks(struc
+                       "[EnableDPMTasks] Failed to initialize odn settings!",
+                       return result);
+ 
+-      result = vega20_populate_umdpstate_clocks(hwmgr);
+-      PP_ASSERT_WITH_CODE(!result,
+-                      "[EnableDPMTasks] Failed to populate umdpstate clocks!",
+-                      return result);
++      vega20_populate_umdpstate_clocks(hwmgr);
+ 
+       result = smum_send_msg_to_smc_with_parameter(hwmgr, PPSMC_MSG_GetPptLimit,
+                       POWER_SOURCE_AC << 16, &hwmgr->default_power_limit);
+--- a/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
++++ b/drivers/gpu/drm/amd/pm/powerplay/inc/hwmgr.h
+@@ -809,6 +809,8 @@ struct pp_hwmgr {
+       uint32_t workload_prority[Workload_Policy_Max];
+       uint32_t workload_setting[Workload_Policy_Max];
+       bool gfxoff_state_changed_by_workload;
++      uint32_t pstate_sclk_peak;
++      uint32_t pstate_mclk_peak;
+ };
+ 
+ int hwmgr_early_init(struct pp_hwmgr *hwmgr);
diff --git a/queue-6.1/drm-amd-pm-fulfill-swsmu-peak-profiling-mode-shader-memory-clock-settings.patch b/queue-6.1/drm-amd-pm-fulfill-swsmu-peak-profiling-mode-shader-memory-clock-settings.patch

new file mode 100644 (file)

index 0000000..b78b762
--- /dev/null
+++ b/queue-6.1/drm-amd-pm-fulfill-swsmu-peak-profiling-mode-shader-memory-clock-settings.patch
@@ -0,0 +1,53 @@
+From stable-owner@vger.kernel.org Fri Aug 11 18:40:59 2023
+From: Mario Limonciello <mario.limonciello@amd.com>
+Date: Fri, 11 Aug 2023 11:40:28 -0500
+Subject: drm/amd/pm: fulfill swsmu peak profiling mode shader/memory clock settings
+To: <stable@vger.kernel.org>
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Message-ID: <20230811164031.24687-2-mario.limonciello@amd.com>
+
+From: Evan Quan <evan.quan@amd.com>
+
+commit 975b4b1d90ccf83da252907108f4090fb61b816e upstream
+
+Enable peak profiling mode shader/memory clocks reporting for swsmu
+framework.
+
+Signed-off-by: Evan Quan <evan.quan@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/include/kgd_pp_interface.h |    2 ++
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c      |    8 ++++++++
+ 2 files changed, 10 insertions(+)
+
+--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
++++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+@@ -139,6 +139,8 @@ enum amd_pp_sensors {
+       AMDGPU_PP_SENSOR_MIN_FAN_RPM,
+       AMDGPU_PP_SENSOR_MAX_FAN_RPM,
+       AMDGPU_PP_SENSOR_VCN_POWER_STATE,
++      AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK,
++      AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK,
+ };
+ 
+ enum amd_pp_task {
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -2520,6 +2520,14 @@ static int smu_read_sensor(void *handle,
+               *((uint32_t *)data) = pstate_table->uclk_pstate.standard * 100;
+               *size = 4;
+               break;
++      case AMDGPU_PP_SENSOR_PEAK_PSTATE_SCLK:
++              *((uint32_t *)data) = pstate_table->gfxclk_pstate.peak * 100;
++              *size = 4;
++              break;
++      case AMDGPU_PP_SENSOR_PEAK_PSTATE_MCLK:
++              *((uint32_t *)data) = pstate_table->uclk_pstate.peak * 100;
++              *size = 4;
++              break;
+       case AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK:
+               ret = smu_feature_get_enabled_mask(smu, (uint64_t *)data);
+               *size = 8;
diff --git a/queue-6.1/series b/queue-6.1/series

index bac731f8d0fbef5f9c010d42af68481f10a1c96c..5fb812cdb04dc460620ad248c28b1ae2da500691 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -34,3 +34,7 @@ cpuidle-dt_idle_genpd-add-helper-function-to-remove-genpd-topology.patch
  hwmon-pmbus-bel-pfe-enable-pmbus_skip_status_check-for-pfe1100.patch
  radix-tree-test-suite-fix-incorrect-allocation-size-for-pthreads.patch
  nilfs2-fix-use-after-free-of-nilfs_root-in-dirtying-inodes-via-iput.patch
+drm-amd-pm-fulfill-swsmu-peak-profiling-mode-shader-memory-clock-settings.patch
+drm-amd-pm-expose-swctf-threshold-setting-for-legacy-powerplay.patch
+drm-amd-pm-fulfill-powerplay-peak-profiling-mode-shader-memory-clock-settings.patch
+drm-amd-pm-avoid-unintentional-shutdown-due-to-temperature-momentary-fluctuation.patch
author	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)
committer	Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	Sat, 12 Aug 2023 08:15:19 +0000 (10:15 +0200)
queue-6.1/drm-amd-pm-avoid-unintentional-shutdown-due-to-temperature-momentary-fluctuation.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amd-pm-expose-swctf-threshold-setting-for-legacy-powerplay.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amd-pm-fulfill-powerplay-peak-profiling-mode-shader-memory-clock-settings.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-amd-pm-fulfill-swsmu-peak-profiling-mode-shader-memory-clock-settings.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history