--- /dev/null
+From fd8a891d195c789a0d673965fdae50cecf3712e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 Dec 2024 22:46:59 +0800
+Subject: ALSA: hda/realtek: fix micmute LEDs don't work on HP Laptops
+
+From: Chris Chiu <chris.chiu@canonical.com>
+
+[ Upstream commit 0d08f0eec961acdb0424a3e2cfb37cfb89154833 ]
+
+These HP laptops use Realtek HDA codec ALC3315 combined CS35L56
+Amplifiers. They need the quirk ALC285_FIXUP_HP_GPIO_LED to get
+the micmute LED working.
+
+Signed-off-by: Chris Chiu <chris.chiu@canonical.com>
+Reviewed-by: Simon Trimmer <simont@opensource.cirrus.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20241202144659.1553504-1-chris.chiu@canonical.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index e847bdb600fd..b50778e34f5d 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10054,7 +10054,13 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8d01, "HP ZBook Power 14 G12", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8d91, "HP ZBook Firefly 14 G12", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8d92, "HP ZBook Firefly 16 G12", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8e18, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8e19, "HP ZBook Firelfy 14 G12A", ALC285_FIXUP_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8e1a, "HP ZBook Firefly 14 G12A", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
+ SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+--
+2.39.5
+
--- /dev/null
+From 4695269f975a317cf1e80dfccfc92a0f28a59b43 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Nov 2024 14:05:24 +0800
+Subject: ALSA: hda/realtek: fix mute/micmute LEDs don't work for EliteBook X
+ G1i
+
+From: Dirk Su <dirk.su@canonical.com>
+
+[ Upstream commit 7ba81e4c3aa0ca25f06dc4456e7d36fa8e76385f ]
+
+HP EliteBook X G1i needs ALC285_FIXUP_HP_GPIO_LED quirk to
+make mic-mute/audio-mute working.
+
+Signed-off-by: Dirk Su <dirk.su@canonical.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20241126060531.22759-1-dirk.su@canonical.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Stable-dep-of: 0d08f0eec961 ("ALSA: hda/realtek: fix micmute LEDs don't work on HP Laptops")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index d61c317b49ea..e847bdb600fd 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10054,6 +10054,7 @@ static const struct hda_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x103c, 0x8ca4, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8ca7, "HP ZBook Fury", ALC245_FIXUP_CS35L41_SPI_2_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x103c, 0x8cf5, "HP ZBook Studio 16", ALC245_FIXUP_CS35L41_SPI_4_HP_GPIO_LED),
++ SND_PCI_QUIRK(0x103c, 0x8d84, "HP EliteBook X G1i", ALC285_FIXUP_HP_GPIO_LED),
+ SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
+ SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
+ SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
+--
+2.39.5
+
--- /dev/null
+From 484f97980955fcdb5858ff8e6b4d50447e76b962 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Aug 2024 11:16:13 +0800
+Subject: drm/amd/amdgpu: allow use kiq to do hdp flush under sriov
+
+From: Victor Zhao <Victor.Zhao@amd.com>
+
+[ Upstream commit bf2bc61638033d118c9ef4ab1204295ba6694401 ]
+
+when use cpu to do page table update under sriov runtime, since mmio
+access is blocked, kiq has to be used to flush hdp.
+
+change WREG32_NO_KIQ to WREG32 to allow kiq.
+
+Signed-off-by: Victor Zhao <Victor.Zhao@amd.com>
+Reviewed-by: Emily Deng <Emily.Deng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: c9b8dcabb52a ("drm/amdgpu/hdp4.0: do a posting read when flushing HDP")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 2 +-
+ drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 2 +-
+ 3 files changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+index 71d1a2e3bac9..bbc6806d0f2b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+@@ -41,7 +41,7 @@ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+ if (!ring || !ring->funcs->emit_wreg)
+- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+index a9ea23fa0def..ed7facacf2fe 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+@@ -32,7 +32,7 @@ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+ if (!ring || !ring->funcs->emit_wreg)
+- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+index 063eba619f2f..53ad93f96cd9 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+@@ -32,7 +32,7 @@ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+ if (!ring || !ring->funcs->emit_wreg)
+- WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ else
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+ }
+--
+2.39.5
+
--- /dev/null
+From 469da94bf969eab696d8001c88cad99b80e56e02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Nov 2024 11:22:51 -0500
+Subject: drm/amdgpu/hdp4.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit c9b8dcabb52afe88413ff135a0953e3cc4128483 ]
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+index bbc6806d0f2b..30210613dc5c 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+@@ -40,10 +40,12 @@
+ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -53,11 +55,13 @@ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
+ adev->ip_versions[HDP_HWIP][0] == IP_VERSION(4, 4, 2))
+ return;
+
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+- else
++ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
++ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
++ }
+ }
+
+ static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
+--
+2.39.5
+
--- /dev/null
+From 550e016b8964e01fb1dbd939044357f9ea026129 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Nov 2024 11:23:56 -0500
+Subject: drm/amdgpu/hdp5.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit cf424020e040be35df05b682b546b255e74a420f ]
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+index ed7facacf2fe..d3962d469088 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -42,6 +44,7 @@ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
+ {
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
++ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
+--
+2.39.5
+
--- /dev/null
+From c194c3e58a73b106c19069337124bd6468fc0cd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Nov 2024 11:24:38 -0500
+Subject: drm/amdgpu/hdp6.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit abe1cbaec6cfe9fde609a15cd6a12c812282ce77 ]
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+index 53ad93f96cd9..b6d71ec1debf 100644
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
+--
+2.39.5
+
--- /dev/null
+From b847f52bec863c76922d8779f2889de0100e8aea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Sep 2023 12:00:23 -0400
+Subject: drm/amdkfd: drop struct kfd_cu_info
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit 0021d70a0654e668d457758110abec33dfbd3ba5 ]
+
+I think this was an abstraction back from when
+kfd supported both radeon and amdgpu. Since we just
+support amdgpu now, there is no more need for this and
+we can use the amdgpu structures directly.
+
+This also avoids having the kfd_cu_info structures on
+the stack when inlining which can blow up the stack.
+
+Cc: Arnd Bergmann <arnd@kernel.org>
+Acked-by: Arnd Bergmann <arnd@arndb.de>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Acked-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 438b39ac74e2 ("drm/amdkfd: pause autosuspend when creating pdd")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 22 ---------
+ drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 2 -
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 28 +++++------
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 28 +++++------
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 49 ++++++++-----------
+ .../gpu/drm/amd/include/kgd_kfd_interface.h | 14 ------
+ 6 files changed, 48 insertions(+), 95 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+index af6c6d89e63a..fbee10927bfb 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+@@ -467,28 +467,6 @@ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev)
+ return 100;
+ }
+
+-void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev, struct kfd_cu_info *cu_info)
+-{
+- struct amdgpu_cu_info acu_info = adev->gfx.cu_info;
+-
+- memset(cu_info, 0, sizeof(*cu_info));
+- if (sizeof(cu_info->cu_bitmap) != sizeof(acu_info.bitmap))
+- return;
+-
+- cu_info->cu_active_number = acu_info.number;
+- cu_info->cu_ao_mask = acu_info.ao_cu_mask;
+- memcpy(&cu_info->cu_bitmap[0], &acu_info.bitmap[0],
+- sizeof(cu_info->cu_bitmap));
+- cu_info->num_shader_engines = adev->gfx.config.max_shader_engines;
+- cu_info->num_shader_arrays_per_engine = adev->gfx.config.max_sh_per_se;
+- cu_info->num_cu_per_sh = adev->gfx.config.max_cu_per_sh;
+- cu_info->simd_per_cu = acu_info.simd_per_cu;
+- cu_info->max_waves_per_simd = acu_info.max_waves_per_simd;
+- cu_info->wave_front_size = acu_info.wave_front_size;
+- cu_info->max_scratch_slots_per_cu = acu_info.max_scratch_slots_per_cu;
+- cu_info->lds_size = acu_info.lds_size;
+-}
+-
+ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
+ uint64_t *bo_size, void *metadata_buffer,
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+index 3134e6ad81d1..ff2b8ace438b 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+@@ -235,8 +235,6 @@ void amdgpu_amdkfd_get_local_mem_info(struct amdgpu_device *adev,
+ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct amdgpu_device *adev);
+
+ uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct amdgpu_device *adev);
+-void amdgpu_amdkfd_get_cu_info(struct amdgpu_device *adev,
+- struct kfd_cu_info *cu_info);
+ int amdgpu_amdkfd_get_dmabuf_info(struct amdgpu_device *adev, int dma_buf_fd,
+ struct amdgpu_device **dmabuf_adev,
+ uint64_t *bo_size, void *metadata_buffer,
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+index f76b7aee5c0a..29a02c175228 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -2037,11 +2037,12 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ uint32_t proximity_domain)
+ {
+ struct crat_header *crat_table = (struct crat_header *)pcrat_image;
++ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
++ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
+ struct crat_subtype_generic *sub_type_hdr;
+ struct kfd_local_mem_info local_mem_info;
+ struct kfd_topology_device *peer_dev;
+ struct crat_subtype_computeunit *cu;
+- struct kfd_cu_info cu_info;
+ int avail_size = *size;
+ uint32_t total_num_of_cu;
+ uint32_t nid = 0;
+@@ -2085,21 +2086,20 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image,
+ cu->flags |= CRAT_CU_FLAGS_GPU_PRESENT;
+ cu->proximity_domain = proximity_domain;
+
+- amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+- cu->num_simd_per_cu = cu_info.simd_per_cu;
+- cu->num_simd_cores = cu_info.simd_per_cu *
+- (cu_info.cu_active_number / kdev->kfd->num_nodes);
+- cu->max_waves_simd = cu_info.max_waves_per_simd;
++ cu->num_simd_per_cu = cu_info->simd_per_cu;
++ cu->num_simd_cores = cu_info->simd_per_cu *
++ (cu_info->number / kdev->kfd->num_nodes);
++ cu->max_waves_simd = cu_info->max_waves_per_simd;
+
+- cu->wave_front_size = cu_info.wave_front_size;
+- cu->array_count = cu_info.num_shader_arrays_per_engine *
+- cu_info.num_shader_engines;
+- total_num_of_cu = (cu->array_count * cu_info.num_cu_per_sh);
++ cu->wave_front_size = cu_info->wave_front_size;
++ cu->array_count = gfx_info->max_sh_per_se *
++ gfx_info->max_shader_engines;
++ total_num_of_cu = (cu->array_count * gfx_info->max_cu_per_sh);
+ cu->processor_id_low = get_and_inc_gpu_processor_id(total_num_of_cu);
+- cu->num_cu_per_array = cu_info.num_cu_per_sh;
+- cu->max_slots_scatch_cu = cu_info.max_scratch_slots_per_cu;
+- cu->num_banks = cu_info.num_shader_engines;
+- cu->lds_size_in_kb = cu_info.lds_size;
++ cu->num_cu_per_array = gfx_info->max_cu_per_sh;
++ cu->max_slots_scatch_cu = cu_info->max_scratch_slots_per_cu;
++ cu->num_banks = gfx_info->max_shader_engines;
++ cu->lds_size_in_kb = cu_info->lds_size;
+
+ cu->hsa_capability = 0;
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+index 4c3f37980311..b276bffcaaf3 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+@@ -99,7 +99,8 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ const uint32_t *cu_mask, uint32_t cu_mask_count,
+ uint32_t *se_mask, uint32_t inst)
+ {
+- struct kfd_cu_info cu_info;
++ struct amdgpu_cu_info *cu_info = &mm->dev->adev->gfx.cu_info;
++ struct amdgpu_gfx_config *gfx_info = &mm->dev->adev->gfx.config;
+ uint32_t cu_per_sh[KFD_MAX_NUM_SE][KFD_MAX_NUM_SH_PER_SE] = {0};
+ bool wgp_mode_req = KFD_GC_VERSION(mm->dev) >= IP_VERSION(10, 0, 0);
+ uint32_t en_mask = wgp_mode_req ? 0x3 : 0x1;
+@@ -108,9 +109,7 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ int inc = cu_inc * NUM_XCC(mm->dev->xcc_mask);
+ int xcc_inst = inst + ffs(mm->dev->xcc_mask) - 1;
+
+- amdgpu_amdkfd_get_cu_info(mm->dev->adev, &cu_info);
+-
+- cu_active_per_node = cu_info.cu_active_number / mm->dev->kfd->num_nodes;
++ cu_active_per_node = cu_info->number / mm->dev->kfd->num_nodes;
+ if (cu_mask_count > cu_active_per_node)
+ cu_mask_count = cu_active_per_node;
+
+@@ -118,13 +117,14 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ * Returning with no CU's enabled will hang the queue, which should be
+ * attention grabbing.
+ */
+- if (cu_info.num_shader_engines > KFD_MAX_NUM_SE) {
+- pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n", cu_info.num_shader_engines);
++ if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
++ pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
++ gfx_info->max_shader_engines);
+ return;
+ }
+- if (cu_info.num_shader_arrays_per_engine > KFD_MAX_NUM_SH_PER_SE) {
++ if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
+ pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+- cu_info.num_shader_arrays_per_engine * cu_info.num_shader_engines);
++ gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
+ return;
+ }
+
+@@ -142,10 +142,10 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ * See note on Arcturus cu_bitmap layout in gfx_v9_0_get_cu_info.
+ * See note on GFX11 cu_bitmap layout in gfx_v11_0_get_cu_info.
+ */
+- for (se = 0; se < cu_info.num_shader_engines; se++)
+- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++)
++ for (se = 0; se < gfx_info->max_shader_engines; se++)
++ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++)
+ cu_per_sh[se][sh] = hweight32(
+- cu_info.cu_bitmap[xcc_inst][se % 4][sh + (se / 4) *
++ cu_info->bitmap[xcc_inst][se % 4][sh + (se / 4) *
+ cu_bitmap_sh_mul]);
+
+ /* Symmetrically map cu_mask to all SEs & SHs:
+@@ -184,13 +184,13 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ *
+ * First ensure all CUs are disabled, then enable user specified CUs.
+ */
+- for (i = 0; i < cu_info.num_shader_engines; i++)
++ for (i = 0; i < gfx_info->max_shader_engines; i++)
+ se_mask[i] = 0;
+
+ i = inst;
+ for (cu = 0; cu < 16; cu += cu_inc) {
+- for (sh = 0; sh < cu_info.num_shader_arrays_per_engine; sh++) {
+- for (se = 0; se < cu_info.num_shader_engines; se++) {
++ for (sh = 0; sh < gfx_info->max_sh_per_se; sh++) {
++ for (se = 0; se < gfx_info->max_shader_engines; se++) {
+ if (cu_per_sh[se][sh] > cu) {
+ if (cu_mask[i / 32] & (en_mask << (i % 32)))
+ se_mask[se] |= en_mask << (cu + sh * 16);
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+index a51363e25624..3885bb53f019 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+@@ -1537,7 +1537,6 @@ static int kfd_dev_create_p2p_links(void)
+ /* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
+ struct kfd_gpu_cache_info *pcache_info,
+- struct kfd_cu_info *cu_info,
+ int cu_bitmask,
+ int cache_type, unsigned int cu_processor_id,
+ int cu_block)
+@@ -1599,7 +1598,8 @@ static int fill_in_l1_pcache(struct kfd_cache_properties **props_ext,
+ /* Helper function. See kfd_fill_gpu_cache_info for parameter description */
+ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+ struct kfd_gpu_cache_info *pcache_info,
+- struct kfd_cu_info *cu_info,
++ struct amdgpu_cu_info *cu_info,
++ struct amdgpu_gfx_config *gfx_info,
+ int cache_type, unsigned int cu_processor_id,
+ struct kfd_node *knode)
+ {
+@@ -1610,7 +1610,7 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+
+ start = ffs(knode->xcc_mask) - 1;
+ end = start + NUM_XCC(knode->xcc_mask);
+- cu_sibling_map_mask = cu_info->cu_bitmap[start][0][0];
++ cu_sibling_map_mask = cu_info->bitmap[start][0][0];
+ cu_sibling_map_mask &=
+ ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ first_active_cu = ffs(cu_sibling_map_mask);
+@@ -1646,15 +1646,15 @@ static int fill_in_l2_l3_pcache(struct kfd_cache_properties **props_ext,
+ k = 0;
+
+ for (xcc = start; xcc < end; xcc++) {
+- for (i = 0; i < cu_info->num_shader_engines; i++) {
+- for (j = 0; j < cu_info->num_shader_arrays_per_engine; j++) {
++ for (i = 0; i < gfx_info->max_shader_engines; i++) {
++ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
+ pcache->sibling_map[k] = (uint8_t)(cu_sibling_map_mask & 0xFF);
+ pcache->sibling_map[k+1] = (uint8_t)((cu_sibling_map_mask >> 8) & 0xFF);
+ pcache->sibling_map[k+2] = (uint8_t)((cu_sibling_map_mask >> 16) & 0xFF);
+ pcache->sibling_map[k+3] = (uint8_t)((cu_sibling_map_mask >> 24) & 0xFF);
+ k += 4;
+
+- cu_sibling_map_mask = cu_info->cu_bitmap[xcc][i % 4][j + i / 4];
++ cu_sibling_map_mask = cu_info->bitmap[xcc][i % 4][j + i / 4];
+ cu_sibling_map_mask &= ((1 << pcache_info[cache_type].num_cu_shared) - 1);
+ }
+ }
+@@ -1679,16 +1679,14 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
+ unsigned int cu_processor_id;
+ int ret;
+ unsigned int num_cu_shared;
+- struct kfd_cu_info cu_info;
+- struct kfd_cu_info *pcu_info;
++ struct amdgpu_cu_info *cu_info = &kdev->adev->gfx.cu_info;
++ struct amdgpu_gfx_config *gfx_info = &kdev->adev->gfx.config;
+ int gpu_processor_id;
+ struct kfd_cache_properties *props_ext;
+ int num_of_entries = 0;
+ int num_of_cache_types = 0;
+ struct kfd_gpu_cache_info cache_info[KFD_MAX_CACHE_TYPES];
+
+- amdgpu_amdkfd_get_cu_info(kdev->adev, &cu_info);
+- pcu_info = &cu_info;
+
+ gpu_processor_id = dev->node_props.simd_id_base;
+
+@@ -1715,12 +1713,12 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
+ cu_processor_id = gpu_processor_id;
+ if (pcache_info[ct].cache_level == 1) {
+ for (xcc = start; xcc < end; xcc++) {
+- for (i = 0; i < pcu_info->num_shader_engines; i++) {
+- for (j = 0; j < pcu_info->num_shader_arrays_per_engine; j++) {
+- for (k = 0; k < pcu_info->num_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
++ for (i = 0; i < gfx_info->max_shader_engines; i++) {
++ for (j = 0; j < gfx_info->max_sh_per_se; j++) {
++ for (k = 0; k < gfx_info->max_cu_per_sh; k += pcache_info[ct].num_cu_shared) {
+
+- ret = fill_in_l1_pcache(&props_ext, pcache_info, pcu_info,
+- pcu_info->cu_bitmap[xcc][i % 4][j + i / 4], ct,
++ ret = fill_in_l1_pcache(&props_ext, pcache_info,
++ cu_info->bitmap[xcc][i % 4][j + i / 4], ct,
+ cu_processor_id, k);
+
+ if (ret < 0)
+@@ -1733,9 +1731,9 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
+
+ /* Move to next CU block */
+ num_cu_shared = ((k + pcache_info[ct].num_cu_shared) <=
+- pcu_info->num_cu_per_sh) ?
++ gfx_info->max_cu_per_sh) ?
+ pcache_info[ct].num_cu_shared :
+- (pcu_info->num_cu_per_sh - k);
++ (gfx_info->max_cu_per_sh - k);
+ cu_processor_id += num_cu_shared;
+ }
+ }
+@@ -1743,7 +1741,7 @@ static void kfd_fill_cache_non_crat_info(struct kfd_topology_device *dev, struct
+ }
+ } else {
+ ret = fill_in_l2_l3_pcache(&props_ext, pcache_info,
+- pcu_info, ct, cu_processor_id, kdev);
++ cu_info, gfx_info, ct, cu_processor_id, kdev);
+
+ if (ret < 0)
+ break;
+@@ -1922,10 +1920,11 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ {
+ uint32_t gpu_id;
+ struct kfd_topology_device *dev;
+- struct kfd_cu_info *cu_info;
+ int res = 0;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
++ struct amdgpu_gfx_config *gfx_info = &gpu->adev->gfx.config;
++ struct amdgpu_cu_info *cu_info = &gpu->adev->gfx.cu_info;
+
+ gpu_id = kfd_generate_gpu_id(gpu);
+ if (gpu->xcp && !gpu->xcp->ddev) {
+@@ -1963,12 +1962,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ /* Fill-in additional information that is not available in CRAT but
+ * needed for the topology
+ */
+- cu_info = kzalloc(sizeof(struct kfd_cu_info), GFP_KERNEL);
+- if (!cu_info)
+- return -ENOMEM;
+-
+- amdgpu_amdkfd_get_cu_info(dev->gpu->adev, cu_info);
+-
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+ if (asic_name[i] == '\0')
+@@ -1977,7 +1970,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ dev->node_props.name[i] = '\0';
+
+ dev->node_props.simd_arrays_per_engine =
+- cu_info->num_shader_arrays_per_engine;
++ gfx_info->max_sh_per_se;
+
+ dev->node_props.gfx_target_version =
+ gpu->kfd->device_info.gfx_target_version;
+@@ -2058,7 +2051,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ */
+ if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
+ dev->node_props.simd_count =
+- cu_info->simd_per_cu * cu_info->cu_active_number;
++ cu_info->simd_per_cu * cu_info->number;
+ dev->node_props.max_waves_per_simd = 10;
+ }
+
+@@ -2085,8 +2078,6 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+
+ kfd_notify_gpu_change(gpu_id, 1);
+
+- kfree(cu_info);
+-
+ return 0;
+ }
+
+diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+index 3b5a56585c4b..c653a7f4d5e5 100644
+--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
++++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+@@ -57,20 +57,6 @@ struct kfd_vm_fault_info {
+ bool prot_exec;
+ };
+
+-struct kfd_cu_info {
+- uint32_t num_shader_engines;
+- uint32_t num_shader_arrays_per_engine;
+- uint32_t num_cu_per_sh;
+- uint32_t cu_active_number;
+- uint32_t cu_ao_mask;
+- uint32_t simd_per_cu;
+- uint32_t max_waves_per_simd;
+- uint32_t wave_front_size;
+- uint32_t max_scratch_slots_per_cu;
+- uint32_t lds_size;
+- uint32_t cu_bitmap[AMDGPU_MAX_GC_INSTANCES][4][4];
+-};
+-
+ /* For getting GPU local memory information from KGD */
+ struct kfd_local_mem_info {
+ uint64_t local_mem_size_private;
+--
+2.39.5
+
--- /dev/null
+From 7ee0fbbb420b37af81a1510db4274e311db40ce8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 5 Dec 2024 17:41:26 +0800
+Subject: drm/amdkfd: pause autosuspend when creating pdd
+
+From: Jesse.zhang@amd.com <Jesse.zhang@amd.com>
+
+[ Upstream commit 438b39ac74e2a9dc0a5c9d653b7d8066877e86b1 ]
+
+When using MES creating a pdd will require talking to the GPU to
+setup the relevant context. The code here forgot to wake up the GPU
+in case it was in suspend, this causes KVM to EFAULT for passthrough
+GPU for example. This issue can be masked if the GPU was woken up by
+other things (e.g. opening the KMS node) first and have not yet gone to sleep.
+
+v4: do the allocation of proc_ctx_bo in a lazy fashion
+when the first queue is created in a process (Felix)
+
+Signed-off-by: Jesse Zhang <jesse.zhang@amd.com>
+Reviewed-by: Yunxiang Li <Yunxiang.Li@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../drm/amd/amdkfd/kfd_device_queue_manager.c | 15 ++++++++++++
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 23 ++-----------------
+ 2 files changed, 17 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+index 4d9a406925e1..43fa260ddbce 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
+@@ -197,6 +197,21 @@ static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
+ if (dqm->is_hws_hang)
+ return -EIO;
+
++ if (!pdd->proc_ctx_cpu_ptr) {
++ r = amdgpu_amdkfd_alloc_gtt_mem(adev,
++ AMDGPU_MES_PROC_CTX_SIZE,
++ &pdd->proc_ctx_bo,
++ &pdd->proc_ctx_gpu_addr,
++ &pdd->proc_ctx_cpu_ptr,
++ false);
++ if (r) {
++ dev_err(adev->dev,
++ "failed to allocate process context bo\n");
++ return r;
++ }
++ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
++ }
++
+ memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
+ queue_input.process_id = qpd->pqm->process->pasid;
+ queue_input.page_table_base_addr = qpd->page_table_base;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index 577bdb6a9640..64346c71c62a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -1046,7 +1046,8 @@ static void kfd_process_destroy_pdds(struct kfd_process *p)
+
+ kfd_free_process_doorbells(pdd->dev->kfd, pdd);
+
+- if (pdd->dev->kfd->shared_resources.enable_mes)
++ if (pdd->dev->kfd->shared_resources.enable_mes &&
++ pdd->proc_ctx_cpu_ptr)
+ amdgpu_amdkfd_free_gtt_mem(pdd->dev->adev,
+ &pdd->proc_ctx_bo);
+ /*
+@@ -1572,7 +1573,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+ struct kfd_process *p)
+ {
+ struct kfd_process_device *pdd = NULL;
+- int retval = 0;
+
+ if (WARN_ON_ONCE(p->n_pdds >= MAX_GPU_INSTANCE))
+ return NULL;
+@@ -1596,21 +1596,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+ pdd->user_gpu_id = dev->id;
+ atomic64_set(&pdd->evict_duration_counter, 0);
+
+- if (dev->kfd->shared_resources.enable_mes) {
+- retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev,
+- AMDGPU_MES_PROC_CTX_SIZE,
+- &pdd->proc_ctx_bo,
+- &pdd->proc_ctx_gpu_addr,
+- &pdd->proc_ctx_cpu_ptr,
+- false);
+- if (retval) {
+- dev_err(dev->adev->dev,
+- "failed to allocate process context bo\n");
+- goto err_free_pdd;
+- }
+- memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+- }
+-
+ p->pdds[p->n_pdds++] = pdd;
+ if (kfd_dbg_is_per_vmid_supported(pdd->dev))
+ pdd->spi_dbg_override = pdd->dev->kfd2kgd->disable_debug_trap(
+@@ -1622,10 +1607,6 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+ idr_init(&pdd->alloc_idr);
+
+ return pdd;
+-
+-err_free_pdd:
+- kfree(pdd);
+- return NULL;
+ }
+
+ /**
+--
+2.39.5
+
--- /dev/null
+From ab1340181dfcfc6f075850be02610fbc6673eea2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Sep 2023 10:32:09 -0400
+Subject: drm/amdkfd: reduce stack size in kfd_topology_add_device()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit 4ff91f218547bfc3d230c00e46725b71a625acbc ]
+
+kfd_topology.c:2082:1: warning: the frame size of 1440 bytes is larger than 1024 bytes
+
+Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2866
+Cc: Arnd Bergmann <arnd@kernel.org>
+Acked-by: Arnd Bergmann <arnd@arndb.de>
+Acked-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Felix Kuehling <Felix.Kuehling@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 438b39ac74e2 ("drm/amdkfd: pause autosuspend when creating pdd")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 13 +++++++++----
+ 1 file changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+index 8362a71ab707..a51363e25624 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+@@ -1922,7 +1922,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ {
+ uint32_t gpu_id;
+ struct kfd_topology_device *dev;
+- struct kfd_cu_info cu_info;
++ struct kfd_cu_info *cu_info;
+ int res = 0;
+ int i;
+ const char *asic_name = amdgpu_asic_name[gpu->adev->asic_type];
+@@ -1963,8 +1963,11 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ /* Fill-in additional information that is not available in CRAT but
+ * needed for the topology
+ */
++ cu_info = kzalloc(sizeof(struct kfd_cu_info), GFP_KERNEL);
++ if (!cu_info)
++ return -ENOMEM;
+
+- amdgpu_amdkfd_get_cu_info(dev->gpu->adev, &cu_info);
++ amdgpu_amdkfd_get_cu_info(dev->gpu->adev, cu_info);
+
+ for (i = 0; i < KFD_TOPOLOGY_PUBLIC_NAME_SIZE-1; i++) {
+ dev->node_props.name[i] = __tolower(asic_name[i]);
+@@ -1974,7 +1977,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ dev->node_props.name[i] = '\0';
+
+ dev->node_props.simd_arrays_per_engine =
+- cu_info.num_shader_arrays_per_engine;
++ cu_info->num_shader_arrays_per_engine;
+
+ dev->node_props.gfx_target_version =
+ gpu->kfd->device_info.gfx_target_version;
+@@ -2055,7 +2058,7 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+ */
+ if (dev->gpu->adev->asic_type == CHIP_CARRIZO) {
+ dev->node_props.simd_count =
+- cu_info.simd_per_cu * cu_info.cu_active_number;
++ cu_info->simd_per_cu * cu_info->cu_active_number;
+ dev->node_props.max_waves_per_simd = 10;
+ }
+
+@@ -2082,6 +2085,8 @@ int kfd_topology_add_device(struct kfd_node *gpu)
+
+ kfd_notify_gpu_change(gpu_id, 1);
+
++ kfree(cu_info);
++
+ return 0;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 765adb71ad895a101c0b67d95c6c77d7a212d5c9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 24 Jun 2024 13:33:16 +0530
+Subject: drm/amdkfd: Use device based logging for errors
+
+From: Lijo Lazar <lijo.lazar@amd.com>
+
+[ Upstream commit 62ec7d38b769ccf33b1080e69c2ae5b7344d116d ]
+
+Convert some pr_* to some dev_* APIs to identify the device.
+
+Signed-off-by: Lijo Lazar <lijo.lazar@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: 438b39ac74e2 ("drm/amdkfd: pause autosuspend when creating pdd")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 3 +-
+ drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 21 ++++---
+ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c | 8 ++-
+ .../gpu/drm/amd/amdkfd/kfd_packet_manager.c | 63 ++++++++++++-------
+ drivers/gpu/drm/amd/amdkfd/kfd_process.c | 24 ++++---
+ 5 files changed, 74 insertions(+), 45 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+index 6604a3f99c5e..b22a036523b7 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c
+@@ -373,7 +373,8 @@ int kfd_init_apertures(struct kfd_process *process)
+
+ pdd = kfd_create_process_device_data(dev, process);
+ if (!pdd) {
+- pr_err("Failed to create process device data\n");
++ dev_err(dev->adev->dev,
++ "Failed to create process device data\n");
+ return -ENOMEM;
+ }
+ /*
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+index 68d13c4fac8f..2c529339ff65 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c
+@@ -68,7 +68,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
+ kq->mqd_mgr = dev->dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
+ break;
+ default:
+- pr_err("Invalid queue type %d\n", type);
++ dev_err(dev->adev->dev, "Invalid queue type %d\n", type);
+ return false;
+ }
+
+@@ -78,13 +78,14 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_node *dev,
+ prop.doorbell_ptr = kfd_get_kernel_doorbell(dev->kfd, &prop.doorbell_off);
+
+ if (!prop.doorbell_ptr) {
+- pr_err("Failed to initialize doorbell");
++ dev_err(dev->adev->dev, "Failed to initialize doorbell");
+ goto err_get_kernel_doorbell;
+ }
+
+ retval = kfd_gtt_sa_allocate(dev, queue_size, &kq->pq);
+ if (retval != 0) {
+- pr_err("Failed to init pq queues size %d\n", queue_size);
++ dev_err(dev->adev->dev, "Failed to init pq queues size %d\n",
++ queue_size);
+ goto err_pq_allocate_vidmem;
+ }
+
+@@ -332,7 +333,7 @@ struct kernel_queue *kernel_queue_init(struct kfd_node *dev,
+ if (kq_initialize(kq, dev, type, KFD_KERNEL_QUEUE_SIZE))
+ return kq;
+
+- pr_err("Failed to init kernel queue\n");
++ dev_err(dev->adev->dev, "Failed to init kernel queue\n");
+
+ kfree(kq);
+ return NULL;
+@@ -351,26 +352,26 @@ static __attribute__((unused)) void test_kq(struct kfd_node *dev)
+ uint32_t *buffer, i;
+ int retval;
+
+- pr_err("Starting kernel queue test\n");
++ dev_err(dev->adev->dev, "Starting kernel queue test\n");
+
+ kq = kernel_queue_init(dev, KFD_QUEUE_TYPE_HIQ);
+ if (unlikely(!kq)) {
+- pr_err(" Failed to initialize HIQ\n");
+- pr_err("Kernel queue test failed\n");
++ dev_err(dev->adev->dev, " Failed to initialize HIQ\n");
++ dev_err(dev->adev->dev, "Kernel queue test failed\n");
+ return;
+ }
+
+ retval = kq_acquire_packet_buffer(kq, 5, &buffer);
+ if (unlikely(retval != 0)) {
+- pr_err(" Failed to acquire packet buffer\n");
+- pr_err("Kernel queue test failed\n");
++ dev_err(dev->adev->dev, " Failed to acquire packet buffer\n");
++ dev_err(dev->adev->dev, "Kernel queue test failed\n");
+ return;
+ }
+ for (i = 0; i < 5; i++)
+ buffer[i] = kq->nop_packet;
+ kq_submit_packet(kq);
+
+- pr_err("Ending kernel queue test\n");
++ dev_err(dev->adev->dev, "Ending kernel queue test\n");
+ }
+
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+index b276bffcaaf3..0edae9ded68a 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager.c
+@@ -118,12 +118,14 @@ void mqd_symmetrically_map_cu_mask(struct mqd_manager *mm,
+ * attention grabbing.
+ */
+ if (gfx_info->max_shader_engines > KFD_MAX_NUM_SE) {
+- pr_err("Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
+- gfx_info->max_shader_engines);
++ dev_err(mm->dev->adev->dev,
++ "Exceeded KFD_MAX_NUM_SE, chip reports %d\n",
++ gfx_info->max_shader_engines);
+ return;
+ }
+ if (gfx_info->max_sh_per_se > KFD_MAX_NUM_SH_PER_SE) {
+- pr_err("Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
++ dev_err(mm->dev->adev->dev,
++ "Exceeded KFD_MAX_NUM_SH, chip reports %d\n",
+ gfx_info->max_sh_per_se * gfx_info->max_shader_engines);
+ return;
+ }
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+index 401096c103b2..ecb38a6e8013 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
+@@ -45,7 +45,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ unsigned int process_count, queue_count, compute_queue_count, gws_queue_count;
+ unsigned int map_queue_size;
+ unsigned int max_proc_per_quantum = 1;
+- struct kfd_node *dev = pm->dqm->dev;
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+
+ process_count = pm->dqm->processes_count;
+ queue_count = pm->dqm->active_queue_count;
+@@ -59,14 +60,14 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ */
+ *over_subscription = false;
+
+- if (dev->max_proc_per_quantum > 1)
+- max_proc_per_quantum = dev->max_proc_per_quantum;
++ if (node->max_proc_per_quantum > 1)
++ max_proc_per_quantum = node->max_proc_per_quantum;
+
+ if ((process_count > max_proc_per_quantum) ||
+ compute_queue_count > get_cp_queues_num(pm->dqm) ||
+ gws_queue_count > 1) {
+ *over_subscription = true;
+- pr_debug("Over subscribed runlist\n");
++ dev_dbg(dev, "Over subscribed runlist\n");
+ }
+
+ map_queue_size = pm->pmf->map_queues_size;
+@@ -81,7 +82,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm,
+ if (*over_subscription)
+ *rlib_size += pm->pmf->runlist_size;
+
+- pr_debug("runlist ib size %d\n", *rlib_size);
++ dev_dbg(dev, "runlist ib size %d\n", *rlib_size);
+ }
+
+ static int pm_allocate_runlist_ib(struct packet_manager *pm,
+@@ -90,6 +91,8 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
+ unsigned int *rl_buffer_size,
+ bool *is_over_subscription)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ int retval;
+
+ if (WARN_ON(pm->allocated))
+@@ -99,11 +102,10 @@ static int pm_allocate_runlist_ib(struct packet_manager *pm,
+
+ mutex_lock(&pm->lock);
+
+- retval = kfd_gtt_sa_allocate(pm->dqm->dev, *rl_buffer_size,
+- &pm->ib_buffer_obj);
++ retval = kfd_gtt_sa_allocate(node, *rl_buffer_size, &pm->ib_buffer_obj);
+
+ if (retval) {
+- pr_err("Failed to allocate runlist IB\n");
++ dev_err(dev, "Failed to allocate runlist IB\n");
+ goto out;
+ }
+
+@@ -125,6 +127,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ {
+ unsigned int alloc_size_bytes;
+ unsigned int *rl_buffer, rl_wptr, i;
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ int retval, processes_mapped;
+ struct device_process_node *cur;
+ struct qcm_process_device *qpd;
+@@ -142,7 +146,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ *rl_size_bytes = alloc_size_bytes;
+ pm->ib_size_bytes = alloc_size_bytes;
+
+- pr_debug("Building runlist ib process count: %d queues count %d\n",
++ dev_dbg(dev, "Building runlist ib process count: %d queues count %d\n",
+ pm->dqm->processes_count, pm->dqm->active_queue_count);
+
+ /* build the run list ib packet */
+@@ -150,7 +154,7 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ qpd = cur->qpd;
+ /* build map process packet */
+ if (processes_mapped >= pm->dqm->processes_count) {
+- pr_debug("Not enough space left in runlist IB\n");
++ dev_dbg(dev, "Not enough space left in runlist IB\n");
+ pm_release_ib(pm);
+ return -ENOMEM;
+ }
+@@ -167,7 +171,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ if (!kq->queue->properties.is_active)
+ continue;
+
+- pr_debug("static_queue, mapping kernel q %d, is debug status %d\n",
++ dev_dbg(dev,
++ "static_queue, mapping kernel q %d, is debug status %d\n",
+ kq->queue->queue, qpd->is_debug);
+
+ retval = pm->pmf->map_queues(pm,
+@@ -186,7 +191,8 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ if (!q->properties.is_active)
+ continue;
+
+- pr_debug("static_queue, mapping user queue %d, is debug status %d\n",
++ dev_dbg(dev,
++ "static_queue, mapping user queue %d, is debug status %d\n",
+ q->queue, qpd->is_debug);
+
+ retval = pm->pmf->map_queues(pm,
+@@ -203,11 +209,13 @@ static int pm_create_runlist_ib(struct packet_manager *pm,
+ }
+ }
+
+- pr_debug("Finished map process and queues to runlist\n");
++ dev_dbg(dev, "Finished map process and queues to runlist\n");
+
+ if (is_over_subscription) {
+ if (!pm->is_over_subscription)
+- pr_warn("Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
++ dev_warn(
++ dev,
++ "Runlist is getting oversubscribed. Expect reduced ROCm performance.\n");
+ retval = pm->pmf->runlist(pm, &rl_buffer[rl_wptr],
+ *rl_gpu_addr,
+ alloc_size_bytes / sizeof(uint32_t),
+@@ -272,6 +280,8 @@ void pm_uninit(struct packet_manager *pm, bool hanging)
+ int pm_send_set_resources(struct packet_manager *pm,
+ struct scheduling_resources *res)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ uint32_t *buffer, size;
+ int retval = 0;
+
+@@ -281,7 +291,7 @@ int pm_send_set_resources(struct packet_manager *pm,
+ size / sizeof(uint32_t),
+ (unsigned int **)&buffer);
+ if (!buffer) {
+- pr_err("Failed to allocate buffer on kernel queue\n");
++ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+@@ -343,6 +353,8 @@ int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues)
+ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+ uint64_t fence_value)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ uint32_t *buffer, size;
+ int retval = 0;
+
+@@ -354,7 +366,7 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+- pr_err("Failed to allocate buffer on kernel queue\n");
++ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+@@ -372,6 +384,8 @@ int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
+
+ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ int retval = 0;
+ uint32_t *buffer, size;
+
+@@ -385,7 +399,8 @@ int pm_update_grace_period(struct packet_manager *pm, uint32_t grace_period)
+ (unsigned int **)&buffer);
+
+ if (!buffer) {
+- pr_err("Failed to allocate buffer on kernel queue\n");
++ dev_err(dev,
++ "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+@@ -406,6 +421,8 @@ int pm_send_unmap_queue(struct packet_manager *pm,
+ enum kfd_unmap_queues_filter filter,
+ uint32_t filter_param, bool reset)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ uint32_t *buffer, size;
+ int retval = 0;
+
+@@ -414,7 +431,7 @@ int pm_send_unmap_queue(struct packet_manager *pm,
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+- pr_err("Failed to allocate buffer on kernel queue\n");
++ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
+ retval = -ENOMEM;
+ goto out;
+ }
+@@ -463,6 +480,8 @@ int pm_debugfs_runlist(struct seq_file *m, void *data)
+
+ int pm_debugfs_hang_hws(struct packet_manager *pm)
+ {
++ struct kfd_node *node = pm->dqm->dev;
++ struct device *dev = node->adev->dev;
+ uint32_t *buffer, size;
+ int r = 0;
+
+@@ -474,16 +493,16 @@ int pm_debugfs_hang_hws(struct packet_manager *pm)
+ kq_acquire_packet_buffer(pm->priv_queue,
+ size / sizeof(uint32_t), (unsigned int **)&buffer);
+ if (!buffer) {
+- pr_err("Failed to allocate buffer on kernel queue\n");
++ dev_err(dev, "Failed to allocate buffer on kernel queue\n");
+ r = -ENOMEM;
+ goto out;
+ }
+ memset(buffer, 0x55, size);
+ kq_submit_packet(pm->priv_queue);
+
+- pr_info("Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
+- buffer[0], buffer[1], buffer[2], buffer[3],
+- buffer[4], buffer[5], buffer[6]);
++ dev_info(dev, "Submitting %x %x %x %x %x %x %x to HIQ to hang the HWS.",
++ buffer[0], buffer[1], buffer[2], buffer[3], buffer[4],
++ buffer[5], buffer[6]);
+ out:
+ mutex_unlock(&pm->lock);
+ return r;
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+index fd640a061c96..577bdb6a9640 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+@@ -1308,7 +1308,8 @@ int kfd_process_init_cwsr_apu(struct kfd_process *p, struct file *filep)
+ if (IS_ERR_VALUE(qpd->tba_addr)) {
+ int err = qpd->tba_addr;
+
+- pr_err("Failure to set tba address. error %d.\n", err);
++ dev_err(dev->adev->dev,
++ "Failure to set tba address. error %d.\n", err);
+ qpd->tba_addr = 0;
+ qpd->cwsr_kaddr = NULL;
+ return err;
+@@ -1603,7 +1604,8 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_node *dev,
+ &pdd->proc_ctx_cpu_ptr,
+ false);
+ if (retval) {
+- pr_err("failed to allocate process context bo\n");
++ dev_err(dev->adev->dev,
++ "failed to allocate process context bo\n");
+ goto err_free_pdd;
+ }
+ memset(pdd->proc_ctx_cpu_ptr, 0, AMDGPU_MES_PROC_CTX_SIZE);
+@@ -1667,7 +1669,7 @@ int kfd_process_device_init_vm(struct kfd_process_device *pdd,
+ &p->kgd_process_info,
+ &p->ef);
+ if (ret) {
+- pr_err("Failed to create process VM object\n");
++ dev_err(dev->adev->dev, "Failed to create process VM object\n");
+ return ret;
+ }
+ pdd->drm_priv = drm_file->private_data;
+@@ -1714,7 +1716,7 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_node *dev,
+
+ pdd = kfd_get_process_device_data(dev, p);
+ if (!pdd) {
+- pr_err("Process device data doesn't exist\n");
++ dev_err(dev->adev->dev, "Process device data doesn't exist\n");
+ return ERR_PTR(-ENOMEM);
+ }
+
+@@ -1824,6 +1826,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
++ struct device *dev = pdd->dev->adev->dev;
+
+ kfd_smi_event_queue_eviction(pdd->dev, p->lead_thread->pid,
+ trigger);
+@@ -1835,7 +1838,7 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
+ * them been add back since they actually not be saved right now.
+ */
+ if (r && r != -EIO) {
+- pr_err("Failed to evict process queues\n");
++ dev_err(dev, "Failed to evict process queues\n");
+ goto fail;
+ }
+ n_evicted++;
+@@ -1857,7 +1860,8 @@ int kfd_process_evict_queues(struct kfd_process *p, uint32_t trigger)
+
+ if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+ &pdd->qpd))
+- pr_err("Failed to restore queues\n");
++ dev_err(pdd->dev->adev->dev,
++ "Failed to restore queues\n");
+
+ n_evicted--;
+ }
+@@ -1873,13 +1877,14 @@ int kfd_process_restore_queues(struct kfd_process *p)
+
+ for (i = 0; i < p->n_pdds; i++) {
+ struct kfd_process_device *pdd = p->pdds[i];
++ struct device *dev = pdd->dev->adev->dev;
+
+ kfd_smi_event_queue_restore(pdd->dev, p->lead_thread->pid);
+
+ r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
+ &pdd->qpd);
+ if (r) {
+- pr_err("Failed to restore process queues\n");
++ dev_err(dev, "Failed to restore process queues\n");
+ if (!ret)
+ ret = r;
+ }
+@@ -2039,7 +2044,7 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
+ struct qcm_process_device *qpd;
+
+ if ((vma->vm_end - vma->vm_start) != KFD_CWSR_TBA_TMA_SIZE) {
+- pr_err("Incorrect CWSR mapping size.\n");
++ dev_err(dev->adev->dev, "Incorrect CWSR mapping size.\n");
+ return -EINVAL;
+ }
+
+@@ -2051,7 +2056,8 @@ int kfd_reserved_mem_mmap(struct kfd_node *dev, struct kfd_process *process,
+ qpd->cwsr_kaddr = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(KFD_CWSR_TBA_TMA_SIZE));
+ if (!qpd->cwsr_kaddr) {
+- pr_err("Error allocating per process CWSR buffer.\n");
++ dev_err(dev->adev->dev,
++ "Error allocating per process CWSR buffer.\n");
+ return -ENOMEM;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 1a960f9fb097035967f72477f7d8e00966a68856 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 Dec 2024 00:48:18 +0000
+Subject: freezer, sched: Report frozen tasks as 'D' instead of 'R'
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Chen Ridong <chenridong@huawei.com>
+
+[ Upstream commit f718faf3940e95d5d34af9041f279f598396ab7d ]
+
+Before commit:
+
+ f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
+
+the frozen task stat was reported as 'D' in cgroup v1.
+
+However, after rewriting the core freezer logic, the frozen task stat is
+reported as 'R'. This is confusing, especially when a task with stat of
+'S' is frozen.
+
+This bug can be reproduced with these steps:
+
+ $ cd /sys/fs/cgroup/freezer/
+ $ mkdir test
+ $ sleep 1000 &
+ [1] 739 // task whose stat is 'S'
+ $ echo 739 > test/cgroup.procs
+ $ echo FROZEN > test/freezer.state
+ $ ps -aux | grep 739
+ root 739 0.1 0.0 8376 1812 pts/0 R 10:56 0:00 sleep 1000
+
+As shown above, a task whose stat is 'S' was changed to 'R' when it was
+frozen.
+
+To solve this regression, simply maintain the same reported state as
+before the rewrite.
+
+[ mingo: Enhanced the changelog and comments ]
+
+Fixes: f5d39b020809 ("freezer,sched: Rewrite core freezer logic")
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Acked-by: Tejun Heo <tj@kernel.org>
+Acked-by: Michal Koutný <mkoutny@suse.com>
+Link: https://lore.kernel.org/r/20241217004818.3200515-1-chenridong@huaweicloud.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/sched.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 4809f27b5201..d4f9d82c69e0 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1683,8 +1683,9 @@ static inline unsigned int __task_state_index(unsigned int tsk_state,
+ * We're lying here, but rather than expose a completely new task state
+ * to userspace, we can make this appear as if the task has gone through
+ * a regular rt_mutex_lock() call.
++ * Report frozen tasks as uninterruptible.
+ */
+- if (tsk_state & TASK_RTLOCK_WAIT)
++ if ((tsk_state & TASK_RTLOCK_WAIT) || (tsk_state & TASK_FROZEN))
+ state = TASK_UNINTERRUPTIBLE;
+
+ return fls(state);
+--
+2.39.5
+
--- /dev/null
+From c325929de93b4961f593d37a882565e126fb8e48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Nov 2024 14:42:02 +0100
+Subject: pmdomain: core: Add missing put_device()
+
+From: Ulf Hansson <ulf.hansson@linaro.org>
+
+[ Upstream commit b8f7bbd1f4ecff6d6277b8c454f62bb0a1c6dbe4 ]
+
+When removing a genpd we don't clean up the genpd->dev correctly. Let's add
+the missing put_device() in genpd_free_data() to fix this.
+
+Fixes: 401ea1572de9 ("PM / Domain: Add struct device to genpd")
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Message-ID: <20241122134207.157283-2-ulf.hansson@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/power/domain.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
+index 582564f8dde6..d9d339b8b571 100644
+--- a/drivers/base/power/domain.c
++++ b/drivers/base/power/domain.c
+@@ -2021,6 +2021,7 @@ static int genpd_alloc_data(struct generic_pm_domain *genpd)
+
+ static void genpd_free_data(struct generic_pm_domain *genpd)
+ {
++ put_device(&genpd->dev);
+ if (genpd_is_cpu_domain(genpd))
+ free_cpumask_var(genpd->cpus);
+ if (genpd->free_states)
+--
+2.39.5
+
mips-probe-toolchain-support-of-msym32.patch
mips-mipsregs-set-proper-isa-level-for-virt-extensio.patch
sched-task_stack-fix-object_is_on_stack-for-kasan-ta.patch
+alsa-hda-realtek-fix-mute-micmute-leds-don-t-work-fo.patch
+alsa-hda-realtek-fix-micmute-leds-don-t-work-on-hp-l.patch
+pmdomain-core-add-missing-put_device.patch
+drm-amd-amdgpu-allow-use-kiq-to-do-hdp-flush-under-s.patch
+drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hd.patch
+drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hd.patch
+drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hd.patch
+x86-cpu-add-model-number-for-intel-clearwater-forest.patch
+x86-cpu-add-model-number-for-another-intel-arrow-lak.patch
+x86-cpu-vfm-add-initialize-x86_vfm-field-to-struct-c.patch
+x86-cpu-vfm-update-arch-x86-include-asm-intel-family.patch
+x86-cpu-intel-switch-to-new-intel-cpu-model-defines.patch
+x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken.patch
+drm-amdkfd-reduce-stack-size-in-kfd_topology_add_dev.patch
+drm-amdkfd-drop-struct-kfd_cu_info.patch
+drm-amdkfd-use-device-based-logging-for-errors.patch
+drm-amdkfd-pause-autosuspend-when-creating-pdd.patch
+freezer-sched-report-frozen-tasks-as-d-instead-of-r.patch
--- /dev/null
+From bf904cc805fac95a390aeac49d2965907662b208 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Nov 2024 21:07:00 -0500
+Subject: x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR
+ implementation
+
+From: Len Brown <len.brown@intel.com>
+
+[ Upstream commit c9a4b55431e5220347881e148725bed69c84e037 ]
+
+Under some conditions, MONITOR wakeups on Lunar Lake processors
+can be lost, resulting in significant user-visible delays.
+
+Add Lunar Lake to X86_BUG_MONITOR so that wake_up_idle_cpu()
+always sends an IPI, avoiding this potential delay.
+
+Reported originally here:
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=219364
+
+[ dhansen: tweak subject ]
+
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/a4aa8842a3c3bfdb7fe9807710eef159cbf0e705.1731463305.git.len.brown%40intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/intel.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index 78f34ff32c9b..97259485c111 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -666,7 +666,9 @@ static void init_intel(struct cpuinfo_x86 *c)
+ c->x86_vfm == INTEL_WESTMERE_EX))
+ set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
+
+- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
++ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
++ (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
++ c->x86_vfm == INTEL_LUNARLAKE_M))
+ set_cpu_bug(c, X86_BUG_MONITOR);
+
+ #ifdef CONFIG_X86_64
+--
+2.39.5
+
--- /dev/null
+From 60eeb525f5a80a22648200d2ac1ecf1388ee03cd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Mar 2024 09:17:25 -0700
+Subject: x86/cpu: Add model number for another Intel Arrow Lake mobile
+ processor
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit 8a8a9c9047d1089598bdb010ec44d7f14b4f9203 ]
+
+This one is the regular laptop CPU.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Link: https://lore.kernel.org/r/20240322161725.195614-1-tony.luck@intel.com
+Stable-dep-of: c9a4b55431e5 ("x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/intel-family.h | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index b65e9c46b922..d0941f4c2724 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -127,6 +127,7 @@
+
+ #define INTEL_FAM6_ARROWLAKE_H 0xC5
+ #define INTEL_FAM6_ARROWLAKE 0xC6
++#define INTEL_FAM6_ARROWLAKE_U 0xB5
+
+ #define INTEL_FAM6_LUNARLAKE_M 0xBD
+
+--
+2.39.5
+
--- /dev/null
+From 380016de2e8ce5337f6dfcd72b6011491776d09e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 17 Jan 2024 11:18:44 -0800
+Subject: x86/cpu: Add model number for Intel Clearwater Forest processor
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit 090e3bec01763e415bccae445f5bfe3d0c61b629 ]
+
+Server product based on the Atom Darkmont core.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Link: https://lore.kernel.org/r/20240117191844.56180-1-tony.luck@intel.com
+Stable-dep-of: c9a4b55431e5 ("x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/intel-family.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index 197316121f04..b65e9c46b922 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -162,6 +162,8 @@
+ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
+ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
+
++#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
++
+ /* Xeon Phi */
+
+ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
+--
+2.39.5
+
--- /dev/null
+From dc5c47f7891f9d6ca9f04c154faad74921e2bdf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 20 May 2024 15:45:59 -0700
+Subject: x86/cpu/intel: Switch to new Intel CPU model defines
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit 6568fc18c2f62b4f35092e9680fe39f3500f4767 ]
+
+New CPU #defines encode vendor and family as well as model.
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Link: https://lore.kernel.org/all/20240520224620.9480-29-tony.luck%40intel.com
+Stable-dep-of: c9a4b55431e5 ("x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/cpu/intel.c | 108 ++++++++++++++++++------------------
+ 1 file changed, 53 insertions(+), 55 deletions(-)
+
+diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
+index aa3e7ed0eb3d..78f34ff32c9b 100644
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -72,19 +72,19 @@ static bool cpu_model_supports_sld __ro_after_init;
+ */
+ static void check_memory_type_self_snoop_errata(struct cpuinfo_x86 *c)
+ {
+- switch (c->x86_model) {
+- case INTEL_FAM6_CORE_YONAH:
+- case INTEL_FAM6_CORE2_MEROM:
+- case INTEL_FAM6_CORE2_MEROM_L:
+- case INTEL_FAM6_CORE2_PENRYN:
+- case INTEL_FAM6_CORE2_DUNNINGTON:
+- case INTEL_FAM6_NEHALEM:
+- case INTEL_FAM6_NEHALEM_G:
+- case INTEL_FAM6_NEHALEM_EP:
+- case INTEL_FAM6_NEHALEM_EX:
+- case INTEL_FAM6_WESTMERE:
+- case INTEL_FAM6_WESTMERE_EP:
+- case INTEL_FAM6_SANDYBRIDGE:
++ switch (c->x86_vfm) {
++ case INTEL_CORE_YONAH:
++ case INTEL_CORE2_MEROM:
++ case INTEL_CORE2_MEROM_L:
++ case INTEL_CORE2_PENRYN:
++ case INTEL_CORE2_DUNNINGTON:
++ case INTEL_NEHALEM:
++ case INTEL_NEHALEM_G:
++ case INTEL_NEHALEM_EP:
++ case INTEL_NEHALEM_EX:
++ case INTEL_WESTMERE:
++ case INTEL_WESTMERE_EP:
++ case INTEL_SANDYBRIDGE:
+ setup_clear_cpu_cap(X86_FEATURE_SELFSNOOP);
+ }
+ }
+@@ -106,9 +106,9 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
+ */
+ if (c->x86 != 6)
+ return;
+- switch (c->x86_model) {
+- case INTEL_FAM6_XEON_PHI_KNL:
+- case INTEL_FAM6_XEON_PHI_KNM:
++ switch (c->x86_vfm) {
++ case INTEL_XEON_PHI_KNL:
++ case INTEL_XEON_PHI_KNM:
+ break;
+ default:
+ return;
+@@ -134,32 +134,32 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c)
+ * - Release note from 20180108 microcode release
+ */
+ struct sku_microcode {
+- u8 model;
++ u32 vfm;
+ u8 stepping;
+ u32 microcode;
+ };
+ static const struct sku_microcode spectre_bad_microcodes[] = {
+- { INTEL_FAM6_KABYLAKE, 0x0B, 0x80 },
+- { INTEL_FAM6_KABYLAKE, 0x0A, 0x80 },
+- { INTEL_FAM6_KABYLAKE, 0x09, 0x80 },
+- { INTEL_FAM6_KABYLAKE_L, 0x0A, 0x80 },
+- { INTEL_FAM6_KABYLAKE_L, 0x09, 0x80 },
+- { INTEL_FAM6_SKYLAKE_X, 0x03, 0x0100013e },
+- { INTEL_FAM6_SKYLAKE_X, 0x04, 0x0200003c },
+- { INTEL_FAM6_BROADWELL, 0x04, 0x28 },
+- { INTEL_FAM6_BROADWELL_G, 0x01, 0x1b },
+- { INTEL_FAM6_BROADWELL_D, 0x02, 0x14 },
+- { INTEL_FAM6_BROADWELL_D, 0x03, 0x07000011 },
+- { INTEL_FAM6_BROADWELL_X, 0x01, 0x0b000025 },
+- { INTEL_FAM6_HASWELL_L, 0x01, 0x21 },
+- { INTEL_FAM6_HASWELL_G, 0x01, 0x18 },
+- { INTEL_FAM6_HASWELL, 0x03, 0x23 },
+- { INTEL_FAM6_HASWELL_X, 0x02, 0x3b },
+- { INTEL_FAM6_HASWELL_X, 0x04, 0x10 },
+- { INTEL_FAM6_IVYBRIDGE_X, 0x04, 0x42a },
++ { INTEL_KABYLAKE, 0x0B, 0x80 },
++ { INTEL_KABYLAKE, 0x0A, 0x80 },
++ { INTEL_KABYLAKE, 0x09, 0x80 },
++ { INTEL_KABYLAKE_L, 0x0A, 0x80 },
++ { INTEL_KABYLAKE_L, 0x09, 0x80 },
++ { INTEL_SKYLAKE_X, 0x03, 0x0100013e },
++ { INTEL_SKYLAKE_X, 0x04, 0x0200003c },
++ { INTEL_BROADWELL, 0x04, 0x28 },
++ { INTEL_BROADWELL_G, 0x01, 0x1b },
++ { INTEL_BROADWELL_D, 0x02, 0x14 },
++ { INTEL_BROADWELL_D, 0x03, 0x07000011 },
++ { INTEL_BROADWELL_X, 0x01, 0x0b000025 },
++ { INTEL_HASWELL_L, 0x01, 0x21 },
++ { INTEL_HASWELL_G, 0x01, 0x18 },
++ { INTEL_HASWELL, 0x03, 0x23 },
++ { INTEL_HASWELL_X, 0x02, 0x3b },
++ { INTEL_HASWELL_X, 0x04, 0x10 },
++ { INTEL_IVYBRIDGE_X, 0x04, 0x42a },
+ /* Observed in the wild */
+- { INTEL_FAM6_SANDYBRIDGE_X, 0x06, 0x61b },
+- { INTEL_FAM6_SANDYBRIDGE_X, 0x07, 0x712 },
++ { INTEL_SANDYBRIDGE_X, 0x06, 0x61b },
++ { INTEL_SANDYBRIDGE_X, 0x07, 0x712 },
+ };
+
+ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+@@ -173,11 +173,8 @@ static bool bad_spectre_microcode(struct cpuinfo_x86 *c)
+ if (cpu_has(c, X86_FEATURE_HYPERVISOR))
+ return false;
+
+- if (c->x86 != 6)
+- return false;
+-
+ for (i = 0; i < ARRAY_SIZE(spectre_bad_microcodes); i++) {
+- if (c->x86_model == spectre_bad_microcodes[i].model &&
++ if (c->x86_vfm == spectre_bad_microcodes[i].vfm &&
+ c->x86_stepping == spectre_bad_microcodes[i].stepping)
+ return (c->microcode <= spectre_bad_microcodes[i].microcode);
+ }
+@@ -312,7 +309,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ * need the microcode to have already been loaded... so if it is
+ * not, recommend a BIOS update and disable large pages.
+ */
+- if (c->x86 == 6 && c->x86_model == 0x1c && c->x86_stepping <= 2 &&
++ if (c->x86_vfm == INTEL_ATOM_BONNELL && c->x86_stepping <= 2 &&
+ c->microcode < 0x20e) {
+ pr_warn("Atom PSE erratum detected, BIOS microcode update recommended\n");
+ clear_cpu_cap(c, X86_FEATURE_PSE);
+@@ -345,11 +342,11 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+
+ /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */
+ if (c->x86 == 6) {
+- switch (c->x86_model) {
+- case INTEL_FAM6_ATOM_SALTWELL_MID:
+- case INTEL_FAM6_ATOM_SALTWELL_TABLET:
+- case INTEL_FAM6_ATOM_SILVERMONT_MID:
+- case INTEL_FAM6_ATOM_AIRMONT_NP:
++ switch (c->x86_vfm) {
++ case INTEL_ATOM_SALTWELL_MID:
++ case INTEL_ATOM_SALTWELL_TABLET:
++ case INTEL_ATOM_SILVERMONT_MID:
++ case INTEL_ATOM_AIRMONT_NP:
+ set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC_S3);
+ break;
+ default:
+@@ -393,7 +390,7 @@ static void early_init_intel(struct cpuinfo_x86 *c)
+ * should be false so that __flush_tlb_all() causes CR3 instead of CR4.PGE
+ * to be modified.
+ */
+- if (c->x86 == 5 && c->x86_model == 9) {
++ if (c->x86_vfm == INTEL_QUARK_X1000) {
+ pr_info("Disabling PGE capability bit\n");
+ setup_clear_cpu_cap(X86_FEATURE_PGE);
+ }
+@@ -663,12 +660,13 @@ static void init_intel(struct cpuinfo_x86 *c)
+ set_cpu_cap(c, X86_FEATURE_PEBS);
+ }
+
+- if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_CLFLUSH) &&
+- (c->x86_model == 29 || c->x86_model == 46 || c->x86_model == 47))
++ if (boot_cpu_has(X86_FEATURE_CLFLUSH) &&
++ (c->x86_vfm == INTEL_CORE2_DUNNINGTON ||
++ c->x86_vfm == INTEL_NEHALEM_EX ||
++ c->x86_vfm == INTEL_WESTMERE_EX))
+ set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
+
+- if (c->x86 == 6 && boot_cpu_has(X86_FEATURE_MWAIT) &&
+- ((c->x86_model == INTEL_FAM6_ATOM_GOLDMONT)))
++ if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
+ set_cpu_bug(c, X86_BUG_MONITOR);
+
+ #ifdef CONFIG_X86_64
+@@ -1285,9 +1283,9 @@ void handle_bus_lock(struct pt_regs *regs)
+ * feature even though they do not enumerate IA32_CORE_CAPABILITIES.
+ */
+ static const struct x86_cpu_id split_lock_cpu_ids[] __initconst = {
+- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, 0),
+- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_L, 0),
+- X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, 0),
++ X86_MATCH_VFM(INTEL_ICELAKE_X, 0),
++ X86_MATCH_VFM(INTEL_ICELAKE_L, 0),
++ X86_MATCH_VFM(INTEL_ICELAKE_D, 0),
+ {}
+ };
+
+--
+2.39.5
+
--- /dev/null
+From ada5574116ee624d90d514b113a3594f2fef33c0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Apr 2024 14:19:03 -0700
+Subject: x86/cpu/vfm: Add/initialize x86_vfm field to struct cpuinfo_x86
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit a9d0adce69075192961f3be466c4810a21b7bc9e ]
+
+Refactor struct cpuinfo_x86 so that the vendor, family, and model
+fields are overlaid in a union with a 32-bit field that combines
+all three (together with a one byte reserved field in the upper
+byte).
+
+This will make it easy, cheap, and reliable to check all three
+values at once.
+
+See
+
+ https://lore.kernel.org/r/Zgr6kT8oULbnmEXx@agluck-desk3
+
+for why the ordering is (low-to-high bits):
+
+ (vendor, family, model)
+
+ [ bp: Move comments over the line, add the backstory about the
+ particular order of the fields. ]
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20240416211941.9369-2-tony.luck@intel.com
+Stable-dep-of: c9a4b55431e5 ("x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/processor.h | 20 +++++++++++++++++---
+ 1 file changed, 17 insertions(+), 3 deletions(-)
+
+diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
+index 6e775303d687..428348e7f06c 100644
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -81,9 +81,23 @@ extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+ */
+
+ struct cpuinfo_x86 {
+- __u8 x86; /* CPU family */
+- __u8 x86_vendor; /* CPU vendor */
+- __u8 x86_model;
++ union {
++ /*
++ * The particular ordering (low-to-high) of (vendor,
++ * family, model) is done in case range of models, like
++ * it is usually done on AMD, need to be compared.
++ */
++ struct {
++ __u8 x86_model;
++ /* CPU family */
++ __u8 x86;
++ /* CPU vendor */
++ __u8 x86_vendor;
++ __u8 x86_reserved;
++ };
++ /* combined vendor, family, model */
++ __u32 x86_vfm;
++ };
+ __u8 x86_stepping;
+ #ifdef CONFIG_X86_64
+ /* Number of 4K pages in DTLB/ITLB combined(in pages): */
+--
+2.39.5
+
--- /dev/null
+From d7afd167aa425b061b981899a6c04b0b44b1f83f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 16 Apr 2024 14:19:05 -0700
+Subject: x86/cpu/vfm: Update arch/x86/include/asm/intel-family.h
+
+From: Tony Luck <tony.luck@intel.com>
+
+[ Upstream commit f055b6260eb3ef20a6e310d1e555a5d5a0a28ca0 ]
+
+New CPU #defines encode vendor and family as well as model.
+
+Update the example usage comment in arch/x86/kernel/cpu/match.c
+
+Signed-off-by: Tony Luck <tony.luck@intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Link: https://lore.kernel.org/r/20240416211941.9369-4-tony.luck@intel.com
+Stable-dep-of: c9a4b55431e5 ("x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/intel-family.h | 84 +++++++++++++++++++++++++++++
+ arch/x86/kernel/cpu/match.c | 3 +-
+ 2 files changed, 85 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h
+index d0941f4c2724..f81a851c46dc 100644
+--- a/arch/x86/include/asm/intel-family.h
++++ b/arch/x86/include/asm/intel-family.h
+@@ -40,137 +40,221 @@
+ * their own names :-(
+ */
+
++#define IFM(_fam, _model) VFM_MAKE(X86_VENDOR_INTEL, _fam, _model)
++
+ /* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */
+ #define INTEL_FAM6_ANY X86_MODEL_ANY
++/* Wildcard match for FAM6 so X86_MATCH_VFM(ANY) works */
++#define INTEL_ANY IFM(X86_FAMILY_ANY, X86_MODEL_ANY)
+
+ #define INTEL_FAM6_CORE_YONAH 0x0E
++#define INTEL_CORE_YONAH IFM(6, 0x0E)
+
+ #define INTEL_FAM6_CORE2_MEROM 0x0F
++#define INTEL_CORE2_MEROM IFM(6, 0x0F)
+ #define INTEL_FAM6_CORE2_MEROM_L 0x16
++#define INTEL_CORE2_MEROM_L IFM(6, 0x16)
+ #define INTEL_FAM6_CORE2_PENRYN 0x17
++#define INTEL_CORE2_PENRYN IFM(6, 0x17)
+ #define INTEL_FAM6_CORE2_DUNNINGTON 0x1D
++#define INTEL_CORE2_DUNNINGTON IFM(6, 0x1D)
+
+ #define INTEL_FAM6_NEHALEM 0x1E
++#define INTEL_NEHALEM IFM(6, 0x1E)
+ #define INTEL_FAM6_NEHALEM_G 0x1F /* Auburndale / Havendale */
++#define INTEL_NEHALEM_G IFM(6, 0x1F) /* Auburndale / Havendale */
+ #define INTEL_FAM6_NEHALEM_EP 0x1A
++#define INTEL_NEHALEM_EP IFM(6, 0x1A)
+ #define INTEL_FAM6_NEHALEM_EX 0x2E
++#define INTEL_NEHALEM_EX IFM(6, 0x2E)
+
+ #define INTEL_FAM6_WESTMERE 0x25
++#define INTEL_WESTMERE IFM(6, 0x25)
+ #define INTEL_FAM6_WESTMERE_EP 0x2C
++#define INTEL_WESTMERE_EP IFM(6, 0x2C)
+ #define INTEL_FAM6_WESTMERE_EX 0x2F
++#define INTEL_WESTMERE_EX IFM(6, 0x2F)
+
+ #define INTEL_FAM6_SANDYBRIDGE 0x2A
++#define INTEL_SANDYBRIDGE IFM(6, 0x2A)
+ #define INTEL_FAM6_SANDYBRIDGE_X 0x2D
++#define INTEL_SANDYBRIDGE_X IFM(6, 0x2D)
+ #define INTEL_FAM6_IVYBRIDGE 0x3A
++#define INTEL_IVYBRIDGE IFM(6, 0x3A)
+ #define INTEL_FAM6_IVYBRIDGE_X 0x3E
++#define INTEL_IVYBRIDGE_X IFM(6, 0x3E)
+
+ #define INTEL_FAM6_HASWELL 0x3C
++#define INTEL_HASWELL IFM(6, 0x3C)
+ #define INTEL_FAM6_HASWELL_X 0x3F
++#define INTEL_HASWELL_X IFM(6, 0x3F)
+ #define INTEL_FAM6_HASWELL_L 0x45
++#define INTEL_HASWELL_L IFM(6, 0x45)
+ #define INTEL_FAM6_HASWELL_G 0x46
++#define INTEL_HASWELL_G IFM(6, 0x46)
+
+ #define INTEL_FAM6_BROADWELL 0x3D
++#define INTEL_BROADWELL IFM(6, 0x3D)
+ #define INTEL_FAM6_BROADWELL_G 0x47
++#define INTEL_BROADWELL_G IFM(6, 0x47)
+ #define INTEL_FAM6_BROADWELL_X 0x4F
++#define INTEL_BROADWELL_X IFM(6, 0x4F)
+ #define INTEL_FAM6_BROADWELL_D 0x56
++#define INTEL_BROADWELL_D IFM(6, 0x56)
+
+ #define INTEL_FAM6_SKYLAKE_L 0x4E /* Sky Lake */
++#define INTEL_SKYLAKE_L IFM(6, 0x4E) /* Sky Lake */
+ #define INTEL_FAM6_SKYLAKE 0x5E /* Sky Lake */
++#define INTEL_SKYLAKE IFM(6, 0x5E) /* Sky Lake */
+ #define INTEL_FAM6_SKYLAKE_X 0x55 /* Sky Lake */
++#define INTEL_SKYLAKE_X IFM(6, 0x55) /* Sky Lake */
+ /* CASCADELAKE_X 0x55 Sky Lake -- s: 7 */
+ /* COOPERLAKE_X 0x55 Sky Lake -- s: 11 */
+
+ #define INTEL_FAM6_KABYLAKE_L 0x8E /* Sky Lake */
++#define INTEL_KABYLAKE_L IFM(6, 0x8E) /* Sky Lake */
+ /* AMBERLAKE_L 0x8E Sky Lake -- s: 9 */
+ /* COFFEELAKE_L 0x8E Sky Lake -- s: 10 */
+ /* WHISKEYLAKE_L 0x8E Sky Lake -- s: 11,12 */
+
+ #define INTEL_FAM6_KABYLAKE 0x9E /* Sky Lake */
++#define INTEL_KABYLAKE IFM(6, 0x9E) /* Sky Lake */
+ /* COFFEELAKE 0x9E Sky Lake -- s: 10-13 */
+
+ #define INTEL_FAM6_COMETLAKE 0xA5 /* Sky Lake */
++#define INTEL_COMETLAKE IFM(6, 0xA5) /* Sky Lake */
+ #define INTEL_FAM6_COMETLAKE_L 0xA6 /* Sky Lake */
++#define INTEL_COMETLAKE_L IFM(6, 0xA6) /* Sky Lake */
+
+ #define INTEL_FAM6_CANNONLAKE_L 0x66 /* Palm Cove */
++#define INTEL_CANNONLAKE_L IFM(6, 0x66) /* Palm Cove */
+
+ #define INTEL_FAM6_ICELAKE_X 0x6A /* Sunny Cove */
++#define INTEL_ICELAKE_X IFM(6, 0x6A) /* Sunny Cove */
+ #define INTEL_FAM6_ICELAKE_D 0x6C /* Sunny Cove */
++#define INTEL_ICELAKE_D IFM(6, 0x6C) /* Sunny Cove */
+ #define INTEL_FAM6_ICELAKE 0x7D /* Sunny Cove */
++#define INTEL_ICELAKE IFM(6, 0x7D) /* Sunny Cove */
+ #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */
++#define INTEL_ICELAKE_L IFM(6, 0x7E) /* Sunny Cove */
+ #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */
++#define INTEL_ICELAKE_NNPI IFM(6, 0x9D) /* Sunny Cove */
+
+ #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */
++#define INTEL_ROCKETLAKE IFM(6, 0xA7) /* Cypress Cove */
+
+ #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */
++#define INTEL_TIGERLAKE_L IFM(6, 0x8C) /* Willow Cove */
+ #define INTEL_FAM6_TIGERLAKE 0x8D /* Willow Cove */
++#define INTEL_TIGERLAKE IFM(6, 0x8D) /* Willow Cove */
+
+ #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */
++#define INTEL_SAPPHIRERAPIDS_X IFM(6, 0x8F) /* Golden Cove */
+
+ #define INTEL_FAM6_EMERALDRAPIDS_X 0xCF
++#define INTEL_EMERALDRAPIDS_X IFM(6, 0xCF)
+
+ #define INTEL_FAM6_GRANITERAPIDS_X 0xAD
++#define INTEL_GRANITERAPIDS_X IFM(6, 0xAD)
+ #define INTEL_FAM6_GRANITERAPIDS_D 0xAE
++#define INTEL_GRANITERAPIDS_D IFM(6, 0xAE)
+
+ /* "Hybrid" Processors (P-Core/E-Core) */
+
+ #define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */
++#define INTEL_LAKEFIELD IFM(6, 0x8A) /* Sunny Cove / Tremont */
+
+ #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */
++#define INTEL_ALDERLAKE IFM(6, 0x97) /* Golden Cove / Gracemont */
+ #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */
++#define INTEL_ALDERLAKE_L IFM(6, 0x9A) /* Golden Cove / Gracemont */
+
+ #define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */
++#define INTEL_RAPTORLAKE IFM(6, 0xB7) /* Raptor Cove / Enhanced Gracemont */
+ #define INTEL_FAM6_RAPTORLAKE_P 0xBA
++#define INTEL_RAPTORLAKE_P IFM(6, 0xBA)
+ #define INTEL_FAM6_RAPTORLAKE_S 0xBF
++#define INTEL_RAPTORLAKE_S IFM(6, 0xBF)
+
+ #define INTEL_FAM6_METEORLAKE 0xAC
++#define INTEL_METEORLAKE IFM(6, 0xAC)
+ #define INTEL_FAM6_METEORLAKE_L 0xAA
++#define INTEL_METEORLAKE_L IFM(6, 0xAA)
+
+ #define INTEL_FAM6_ARROWLAKE_H 0xC5
++#define INTEL_ARROWLAKE_H IFM(6, 0xC5)
+ #define INTEL_FAM6_ARROWLAKE 0xC6
++#define INTEL_ARROWLAKE IFM(6, 0xC6)
+ #define INTEL_FAM6_ARROWLAKE_U 0xB5
++#define INTEL_ARROWLAKE_U IFM(6, 0xB5)
+
+ #define INTEL_FAM6_LUNARLAKE_M 0xBD
++#define INTEL_LUNARLAKE_M IFM(6, 0xBD)
+
+ /* "Small Core" Processors (Atom/E-Core) */
+
+ #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */
++#define INTEL_ATOM_BONNELL IFM(6, 0x1C) /* Diamondville, Pineview */
+ #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */
++#define INTEL_ATOM_BONNELL_MID IFM(6, 0x26) /* Silverthorne, Lincroft */
+
+ #define INTEL_FAM6_ATOM_SALTWELL 0x36 /* Cedarview */
++#define INTEL_ATOM_SALTWELL IFM(6, 0x36) /* Cedarview */
+ #define INTEL_FAM6_ATOM_SALTWELL_MID 0x27 /* Penwell */
++#define INTEL_ATOM_SALTWELL_MID IFM(6, 0x27) /* Penwell */
+ #define INTEL_FAM6_ATOM_SALTWELL_TABLET 0x35 /* Cloverview */
++#define INTEL_ATOM_SALTWELL_TABLET IFM(6, 0x35) /* Cloverview */
+
+ #define INTEL_FAM6_ATOM_SILVERMONT 0x37 /* Bay Trail, Valleyview */
++#define INTEL_ATOM_SILVERMONT IFM(6, 0x37) /* Bay Trail, Valleyview */
+ #define INTEL_FAM6_ATOM_SILVERMONT_D 0x4D /* Avaton, Rangely */
++#define INTEL_ATOM_SILVERMONT_D IFM(6, 0x4D) /* Avaton, Rangely */
+ #define INTEL_FAM6_ATOM_SILVERMONT_MID 0x4A /* Merriefield */
++#define INTEL_ATOM_SILVERMONT_MID IFM(6, 0x4A) /* Merriefield */
+
+ #define INTEL_FAM6_ATOM_AIRMONT 0x4C /* Cherry Trail, Braswell */
++#define INTEL_ATOM_AIRMONT IFM(6, 0x4C) /* Cherry Trail, Braswell */
+ #define INTEL_FAM6_ATOM_AIRMONT_MID 0x5A /* Moorefield */
++#define INTEL_ATOM_AIRMONT_MID IFM(6, 0x5A) /* Moorefield */
+ #define INTEL_FAM6_ATOM_AIRMONT_NP 0x75 /* Lightning Mountain */
++#define INTEL_ATOM_AIRMONT_NP IFM(6, 0x75) /* Lightning Mountain */
+
+ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C /* Apollo Lake */
++#define INTEL_ATOM_GOLDMONT IFM(6, 0x5C) /* Apollo Lake */
+ #define INTEL_FAM6_ATOM_GOLDMONT_D 0x5F /* Denverton */
++#define INTEL_ATOM_GOLDMONT_D IFM(6, 0x5F) /* Denverton */
+
+ /* Note: the micro-architecture is "Goldmont Plus" */
+ #define INTEL_FAM6_ATOM_GOLDMONT_PLUS 0x7A /* Gemini Lake */
++#define INTEL_ATOM_GOLDMONT_PLUS IFM(6, 0x7A) /* Gemini Lake */
+
+ #define INTEL_FAM6_ATOM_TREMONT_D 0x86 /* Jacobsville */
++#define INTEL_ATOM_TREMONT_D IFM(6, 0x86) /* Jacobsville */
+ #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */
++#define INTEL_ATOM_TREMONT IFM(6, 0x96) /* Elkhart Lake */
+ #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */
++#define INTEL_ATOM_TREMONT_L IFM(6, 0x9C) /* Jasper Lake */
+
+ #define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */
++#define INTEL_ATOM_GRACEMONT IFM(6, 0xBE) /* Alderlake N */
+
+ #define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */
++#define INTEL_ATOM_CRESTMONT_X IFM(6, 0xAF) /* Sierra Forest */
+ #define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */
++#define INTEL_ATOM_CRESTMONT IFM(6, 0xB6) /* Grand Ridge */
+
+ #define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */
++#define INTEL_ATOM_DARKMONT_X IFM(6, 0xDD) /* Clearwater Forest */
+
+ /* Xeon Phi */
+
+ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */
++#define INTEL_XEON_PHI_KNL IFM(6, 0x57) /* Knights Landing */
+ #define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */
++#define INTEL_XEON_PHI_KNM IFM(6, 0x85) /* Knights Mill */
+
+ /* Family 5 */
+ #define INTEL_FAM5_QUARK_X1000 0x09 /* Quark X1000 SoC */
++#define INTEL_QUARK_X1000 IFM(5, 0x09) /* Quark X1000 SoC */
+
+ #endif /* _ASM_X86_INTEL_FAMILY_H */
+diff --git a/arch/x86/kernel/cpu/match.c b/arch/x86/kernel/cpu/match.c
+index ae71b8ef909c..978a3094e8ff 100644
+--- a/arch/x86/kernel/cpu/match.c
++++ b/arch/x86/kernel/cpu/match.c
+@@ -17,8 +17,7 @@
+ *
+ * A typical table entry would be to match a specific CPU
+ *
+- * X86_MATCH_VENDOR_FAM_MODEL_FEATURE(INTEL, 6, INTEL_FAM6_BROADWELL,
+- * X86_FEATURE_ANY, NULL);
++ * X86_MATCH_VFM_FEATURE(INTEL_BROADWELL, X86_FEATURE_ANY, NULL);
+ *
+ * Fields can be wildcarded with %X86_VENDOR_ANY, %X86_FAMILY_ANY,
+ * %X86_MODEL_ANY, %X86_FEATURE_ANY (except for vendor)
+--
+2.39.5
+