]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
6.12-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Dec 2024 09:36:04 +0000 (10:36 +0100)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Tue, 10 Dec 2024 09:36:04 +0000 (10:36 +0100)
added patches:
arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch
bpf-fix-oob-devmap-writes-when-deleting-elements.patch
cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch
dma-buf-fix-dma_fence_array_signaled-v4.patch
dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch
dma-fence-use-kernel-s-sort-for-merging-fences.patch
drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch
drm-amd-display-correct-prefetch-calculation.patch
drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch
drm-amd-pm-fix-and-simplify-workload-handling.patch
drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch
drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch
drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch
drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch
drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch
drm-amdgpu-rework-resume-handling-for-display-v2.patch
drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch
drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch
drm-dp_mst-fix-mst-sideband-message-body-length-check.patch
drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch
drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch
mmc-core-further-prevent-card-detect-during-shutdown.patch
mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch
modpost-add-.irqentry.text-to-other_sections.patch
regmap-detach-regmap-from-dev-on-regmap_exit.patch
selftest-hugetlb_dio-fix-test-naming.patch
selftests-damon-add-_damon_sysfs.py-to-test_files.patch
x86-cacheinfo-delete-global-num_cache_leaves.patch
x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch
x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch
xsk-fix-oob-map-writes-when-deleting-elements.patch

32 files changed:
queue-6.12/arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch [new file with mode: 0644]
queue-6.12/bpf-fix-oob-devmap-writes-when-deleting-elements.patch [new file with mode: 0644]
queue-6.12/cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch [new file with mode: 0644]
queue-6.12/dma-buf-fix-dma_fence_array_signaled-v4.patch [new file with mode: 0644]
queue-6.12/dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch [new file with mode: 0644]
queue-6.12/dma-fence-use-kernel-s-sort-for-merging-fences.patch [new file with mode: 0644]
queue-6.12/drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch [new file with mode: 0644]
queue-6.12/drm-amd-display-correct-prefetch-calculation.patch [new file with mode: 0644]
queue-6.12/drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch [new file with mode: 0644]
queue-6.12/drm-amd-pm-fix-and-simplify-workload-handling.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch [new file with mode: 0644]
queue-6.12/drm-amdgpu-rework-resume-handling-for-display-v2.patch [new file with mode: 0644]
queue-6.12/drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch [new file with mode: 0644]
queue-6.12/drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch [new file with mode: 0644]
queue-6.12/drm-dp_mst-fix-mst-sideband-message-body-length-check.patch [new file with mode: 0644]
queue-6.12/drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch [new file with mode: 0644]
queue-6.12/drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch [new file with mode: 0644]
queue-6.12/mmc-core-further-prevent-card-detect-during-shutdown.patch [new file with mode: 0644]
queue-6.12/mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch [new file with mode: 0644]
queue-6.12/modpost-add-.irqentry.text-to-other_sections.patch [new file with mode: 0644]
queue-6.12/regmap-detach-regmap-from-dev-on-regmap_exit.patch [new file with mode: 0644]
queue-6.12/selftest-hugetlb_dio-fix-test-naming.patch [new file with mode: 0644]
queue-6.12/selftests-damon-add-_damon_sysfs.py-to-test_files.patch [new file with mode: 0644]
queue-6.12/series
queue-6.12/x86-cacheinfo-delete-global-num_cache_leaves.patch [new file with mode: 0644]
queue-6.12/x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch [new file with mode: 0644]
queue-6.12/x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch [new file with mode: 0644]
queue-6.12/xsk-fix-oob-map-writes-when-deleting-elements.patch [new file with mode: 0644]

diff --git a/queue-6.12/arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch b/queue-6.12/arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch
new file mode 100644 (file)
index 0000000..bdc3214
--- /dev/null
@@ -0,0 +1,110 @@
+From 180bbad698641873120a48857bb3b9f3166bf684 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Sun, 1 Dec 2024 09:27:02 +0000
+Subject: arch_numa: Restore nid checks before registering a memblock with a node
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 180bbad698641873120a48857bb3b9f3166bf684 upstream.
+
+Commit 767507654c22 ("arch_numa: switch over to numa_memblks")
+significantly cleaned up the NUMA registration code, but also
+dropped a significant check that was refusing to accept to
+configure a memblock with an invalid nid.
+
+On "quality hardware" such as my ThunderX machine, this results
+in a kernel that dies immediately:
+
+[    0.000000] Booting Linux on physical CPU 0x0000000000 [0x431f0a10]
+[    0.000000] Linux version 6.12.0-00013-g8920d74cf8db (maz@valley-girl) (gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40) #3872 SMP PREEMPT Wed Nov 27 15:25:49 GMT 2024
+[    0.000000] KASLR disabled due to lack of seed
+[    0.000000] Machine model: Cavium ThunderX CN88XX board
+[    0.000000] efi: EFI v2.4 by American Megatrends
+[    0.000000] efi: ESRT=0xffce0ff18 SMBIOS 3.0=0xfffb0000 ACPI 2.0=0xffec60000 MEMRESERVE=0xffc905d98
+[    0.000000] esrt: Reserving ESRT space from 0x0000000ffce0ff18 to 0x0000000ffce0ff50.
+[    0.000000] earlycon: pl11 at MMIO 0x000087e024000000 (options '115200n8')
+[    0.000000] printk: legacy bootconsole [pl11] enabled
+[    0.000000] NODE_DATA(0) allocated [mem 0xff6754580-0xff67566bf]
+[    0.000000] Unable to handle kernel paging request at virtual address 0000000000001d40
+[    0.000000] Mem abort info:
+[    0.000000]   ESR = 0x0000000096000004
+[    0.000000]   EC = 0x25: DABT (current EL), IL = 32 bits
+[    0.000000]   SET = 0, FnV = 0
+[    0.000000]   EA = 0, S1PTW = 0
+[    0.000000]   FSC = 0x04: level 0 translation fault
+[    0.000000] Data abort info:
+[    0.000000]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+[    0.000000]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[    0.000000]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[    0.000000] [0000000000001d40] user address but active_mm is swapper
+[    0.000000] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[    0.000000] Modules linked in:
+[    0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.12.0-00013-g8920d74cf8db #3872
+[    0.000000] Hardware name: Cavium ThunderX CN88XX board (DT)
+[    0.000000] pstate: a00000c5 (NzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[    0.000000] pc : sparse_init_nid+0x54/0x428
+[    0.000000] lr : sparse_init+0x118/0x240
+[    0.000000] sp : ffff800081da3cb0
+[    0.000000] x29: ffff800081da3cb0 x28: 0000000fedbab10c x27: 0000000000000001
+[    0.000000] x26: 0000000ffee250f8 x25: 0000000000000001 x24: ffff800082102cd0
+[    0.000000] x23: 0000000000000001 x22: 0000000000000000 x21: 00000000001fffff
+[    0.000000] x20: 0000000000000001 x19: 0000000000000000 x18: ffffffffffffffff
+[    0.000000] x17: 0000000001b00000 x16: 0000000ffd130000 x15: 0000000000000000
+[    0.000000] x14: 00000000003e0000 x13: 00000000000001c8 x12: 0000000000000014
+[    0.000000] x11: ffff800081e82860 x10: ffff8000820fb2c8 x9 : ffff8000820fb490
+[    0.000000] x8 : 0000000000ffed20 x7 : 0000000000000014 x6 : 00000000001fffff
+[    0.000000] x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000000
+[    0.000000] x2 : 0000000000000000 x1 : 0000000000000040 x0 : 0000000000000007
+[    0.000000] Call trace:
+[    0.000000]  sparse_init_nid+0x54/0x428
+[    0.000000]  sparse_init+0x118/0x240
+[    0.000000]  bootmem_init+0x70/0x1c8
+[    0.000000]  setup_arch+0x184/0x270
+[    0.000000]  start_kernel+0x74/0x670
+[    0.000000]  __primary_switched+0x80/0x90
+[    0.000000] Code: f865d804 d37df060 cb030000 d2800003 (b95d4084)
+[    0.000000] ---[ end trace 0000000000000000 ]---
+[    0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
+[    0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
+
+while previous kernel versions were able to recognise how brain-damaged
+the machine is, and only build a fake node.
+
+Use the memblock_validate_numa_coverage() helper to restore some sanity
+and a "working" system.
+
+Fixes: 767507654c22 ("arch_numa: switch over to numa_memblks")
+Suggested-by: Mike Rapoport <rppt@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241201092702.3792845-1-maz@kernel.org
+Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/arch_numa.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
+index e18701676426..c99f2ab105e5 100644
+--- a/drivers/base/arch_numa.c
++++ b/drivers/base/arch_numa.c
+@@ -208,6 +208,10 @@ static int __init numa_register_nodes(void)
+ {
+       int nid;
++      /* Check the validity of the memblock/node mapping */
++      if (!memblock_validate_numa_coverage(0))
++              return -EINVAL;
++
+       /* Finally register nodes. */
+       for_each_node_mask(nid, numa_nodes_parsed) {
+               unsigned long start_pfn, end_pfn;
+-- 
+2.47.1
+
diff --git a/queue-6.12/bpf-fix-oob-devmap-writes-when-deleting-elements.patch b/queue-6.12/bpf-fix-oob-devmap-writes-when-deleting-elements.patch
new file mode 100644 (file)
index 0000000..523c579
--- /dev/null
@@ -0,0 +1,110 @@
+From ab244dd7cf4c291f82faacdc50b45cc0f55b674d Mon Sep 17 00:00:00 2001
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Date: Fri, 22 Nov 2024 13:10:30 +0100
+Subject: bpf: fix OOB devmap writes when deleting elements
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+commit ab244dd7cf4c291f82faacdc50b45cc0f55b674d upstream.
+
+Jordy reported issue against XSKMAP which also applies to DEVMAP - the
+index used for accessing map entry, due to being a signed integer,
+causes the OOB writes. Fix is simple as changing the type from int to
+u32, however, when compared to XSKMAP case, one more thing needs to be
+addressed.
+
+When map is released from system via dev_map_free(), we iterate through
+all of the entries and an iterator variable is also an int, which
+implies OOB accesses. Again, change it to be u32.
+
+Example splat below:
+
+[  160.724676] BUG: unable to handle page fault for address: ffffc8fc2c001000
+[  160.731662] #PF: supervisor read access in kernel mode
+[  160.736876] #PF: error_code(0x0000) - not-present page
+[  160.742095] PGD 0 P4D 0
+[  160.744678] Oops: Oops: 0000 [#1] PREEMPT SMP
+[  160.749106] CPU: 1 UID: 0 PID: 520 Comm: kworker/u145:12 Not tainted 6.12.0-rc1+ #487
+[  160.757050] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
+[  160.767642] Workqueue: events_unbound bpf_map_free_deferred
+[  160.773308] RIP: 0010:dev_map_free+0x77/0x170
+[  160.777735] Code: 00 e8 fd 91 ed ff e8 b8 73 ed ff 41 83 7d 18 19 74 6e 41 8b 45 24 49 8b bd f8 00 00 00 31 db 85 c0 74 48 48 63 c3 48 8d 04 c7 <48> 8b 28 48 85 ed 74 30 48 8b 7d 18 48 85 ff 74 05 e8 b3 52 fa ff
+[  160.796777] RSP: 0018:ffffc9000ee1fe38 EFLAGS: 00010202
+[  160.802086] RAX: ffffc8fc2c001000 RBX: 0000000080000000 RCX: 0000000000000024
+[  160.809331] RDX: 0000000000000000 RSI: 0000000000000024 RDI: ffffc9002c001000
+[  160.816576] RBP: 0000000000000000 R08: 0000000000000023 R09: 0000000000000001
+[  160.823823] R10: 0000000000000001 R11: 00000000000ee6b2 R12: dead000000000122
+[  160.831066] R13: ffff88810c928e00 R14: ffff8881002df405 R15: 0000000000000000
+[  160.838310] FS:  0000000000000000(0000) GS:ffff8897e0c40000(0000) knlGS:0000000000000000
+[  160.846528] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[  160.852357] CR2: ffffc8fc2c001000 CR3: 0000000005c32006 CR4: 00000000007726f0
+[  160.859604] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[  160.866847] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[  160.874092] PKRU: 55555554
+[  160.876847] Call Trace:
+[  160.879338]  <TASK>
+[  160.881477]  ? __die+0x20/0x60
+[  160.884586]  ? page_fault_oops+0x15a/0x450
+[  160.888746]  ? search_extable+0x22/0x30
+[  160.892647]  ? search_bpf_extables+0x5f/0x80
+[  160.896988]  ? exc_page_fault+0xa9/0x140
+[  160.900973]  ? asm_exc_page_fault+0x22/0x30
+[  160.905232]  ? dev_map_free+0x77/0x170
+[  160.909043]  ? dev_map_free+0x58/0x170
+[  160.912857]  bpf_map_free_deferred+0x51/0x90
+[  160.917196]  process_one_work+0x142/0x370
+[  160.921272]  worker_thread+0x29e/0x3b0
+[  160.925082]  ? rescuer_thread+0x4b0/0x4b0
+[  160.929157]  kthread+0xd4/0x110
+[  160.932355]  ? kthread_park+0x80/0x80
+[  160.936079]  ret_from_fork+0x2d/0x50
+[  160.943396]  ? kthread_park+0x80/0x80
+[  160.950803]  ret_from_fork_asm+0x11/0x20
+[  160.958482]  </TASK>
+
+Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references")
+CC: stable@vger.kernel.org
+Reported-by: Jordy Zomer <jordyzomer@google.com>
+Suggested-by: Jordy Zomer <jordyzomer@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Link: https://lore.kernel.org/r/20241122121030.716788-3-maciej.fijalkowski@intel.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/devmap.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/bpf/devmap.c
++++ b/kernel/bpf/devmap.c
+@@ -184,7 +184,7 @@ static struct bpf_map *dev_map_alloc(uni
+ static void dev_map_free(struct bpf_map *map)
+ {
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+-      int i;
++      u32 i;
+       /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+        * so the programs (can be more than one that used this map) were
+@@ -821,7 +821,7 @@ static long dev_map_delete_elem(struct b
+ {
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct bpf_dtab_netdev *old_dev;
+-      int k = *(u32 *)key;
++      u32 k = *(u32 *)key;
+       if (k >= map->max_entries)
+               return -EINVAL;
+@@ -838,7 +838,7 @@ static long dev_map_hash_delete_elem(str
+ {
+       struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+       struct bpf_dtab_netdev *old_dev;
+-      int k = *(u32 *)key;
++      u32 k = *(u32 *)key;
+       unsigned long flags;
+       int ret = -ENOENT;
diff --git a/queue-6.12/cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch b/queue-6.12/cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch
new file mode 100644 (file)
index 0000000..ead3b72
--- /dev/null
@@ -0,0 +1,103 @@
+From b3fce429a1e030b50c1c91351d69b8667eef627b Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Wed, 27 Nov 2024 16:22:46 -0800
+Subject: cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
+
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+
+commit b3fce429a1e030b50c1c91351d69b8667eef627b upstream.
+
+Commit
+
+  5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
+
+adds functionality that architectures can use to optionally allocate and
+build cacheinfo early during boot. Commit
+
+  6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
+
+lets secondary CPUs correct (and reallocate memory) cacheinfo data if
+needed.
+
+If the early build functionality is not used and cacheinfo does not need
+correction, memory for cacheinfo is never allocated. x86 does not use
+the early build functionality. Consequently, during the cacheinfo CPU
+hotplug callback, last_level_cache_is_valid() attempts to dereference
+a NULL pointer:
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000100
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not present page
+  PGD 0 P4D 0
+  Oops: 0000 [#1] PREEPMT SMP NOPTI
+  CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
+  RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
+
+Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
+if not done earlier.
+
+Moreover, before determining the validity of the last-level cache info,
+ensure that it has been allocated. Simply checking for non-zero
+cache_leaves() is not sufficient, as some architectures (e.g., Intel
+processors) have non-zero cache_leaves() before allocation.
+
+Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
+This function iterates over all online CPUs. However, a CPU may have come
+online recently, but its cacheinfo may not have been allocated yet.
+
+While here, remove an unnecessary indentation in allocate_cache_info().
+
+  [ bp: Massage. ]
+
+Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Radu Rendec <rrendec@redhat.com>
+Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
+Cc: stable@vger.kernel.org # 6.3+
+Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/cacheinfo.c |   14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/base/cacheinfo.c
++++ b/drivers/base/cacheinfo.c
+@@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned
+ {
+       struct cacheinfo *llc;
+-      if (!cache_leaves(cpu))
++      if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
+               return false;
+       llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+@@ -463,11 +463,9 @@ int __weak populate_cache_leaves(unsigne
+       return -ENOENT;
+ }
+-static inline
+-int allocate_cache_info(int cpu)
++static inline int allocate_cache_info(int cpu)
+ {
+-      per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
+-                                       sizeof(struct cacheinfo), GFP_ATOMIC);
++      per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
+       if (!per_cpu_cacheinfo(cpu)) {
+               cache_leaves(cpu) = 0;
+               return -ENOMEM;
+@@ -539,7 +537,11 @@ static inline int init_level_allocate_ci
+        */
+       ci_cacheinfo(cpu)->early_ci_levels = false;
+-      if (cache_leaves(cpu) <= early_leaves)
++      /*
++       * Some architectures (e.g., x86) do not use early initialization.
++       * Allocate memory now in such case.
++       */
++      if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
+               return 0;
+       kfree(per_cpu_cacheinfo(cpu));
diff --git a/queue-6.12/dma-buf-fix-dma_fence_array_signaled-v4.patch b/queue-6.12/dma-buf-fix-dma_fence_array_signaled-v4.patch
new file mode 100644 (file)
index 0000000..c84e446
--- /dev/null
@@ -0,0 +1,75 @@
+From 78ac1c3558810486d90aa533b0039aa70487a3da Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 8 Nov 2024 09:29:48 +0100
+Subject: dma-buf: fix dma_fence_array_signaled v4
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+commit 78ac1c3558810486d90aa533b0039aa70487a3da upstream.
+
+The function silently assumed that signaling was already enabled for the
+dma_fence_array. This meant that without enabling signaling first we would
+never see forward progress.
+
+Fix that by falling back to testing each individual fence when signaling
+isn't enabled yet.
+
+v2: add the comment suggested by Boris why this is done this way
+v3: fix the underflow pointed out by Tvrtko
+v4: atomic_read_acquire() as suggested by Tvrtko
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Tested-by: Chia-I Wu <olvaffe@gmail.com>
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12094
+Cc: <stable@vger.kernel.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241112121925.18464-1-christian.koenig@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-array.c |   28 +++++++++++++++++++++++++++-
+ 1 file changed, 27 insertions(+), 1 deletion(-)
+
+--- a/drivers/dma-buf/dma-fence-array.c
++++ b/drivers/dma-buf/dma-fence-array.c
+@@ -103,10 +103,36 @@ static bool dma_fence_array_enable_signa
+ static bool dma_fence_array_signaled(struct dma_fence *fence)
+ {
+       struct dma_fence_array *array = to_dma_fence_array(fence);
++      int num_pending;
++      unsigned int i;
+-      if (atomic_read(&array->num_pending) > 0)
++      /*
++       * We need to read num_pending before checking the enable_signal bit
++       * to avoid racing with the enable_signaling() implementation, which
++       * might decrement the counter, and cause a partial check.
++       * atomic_read_acquire() pairs with atomic_dec_and_test() in
++       * dma_fence_array_enable_signaling()
++       *
++       * The !--num_pending check is here to account for the any_signaled case
++       * if we race with enable_signaling(), that means the !num_pending check
++       * in the is_signalling_enabled branch might be outdated (num_pending
++       * might have been decremented), but that's fine. The user will get the
++       * right value when testing again later.
++       */
++      num_pending = atomic_read_acquire(&array->num_pending);
++      if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) {
++              if (num_pending <= 0)
++                      goto signal;
+               return false;
++      }
++      for (i = 0; i < array->num_fences; ++i) {
++              if (dma_fence_is_signaled(array->fences[i]) && !--num_pending)
++                      goto signal;
++      }
++      return false;
++
++signal:
+       dma_fence_array_clear_pending_error(array);
+       return true;
+ }
diff --git a/queue-6.12/dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch b/queue-6.12/dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch
new file mode 100644 (file)
index 0000000..d49561b
--- /dev/null
@@ -0,0 +1,45 @@
+From 949291c5314009b4f6e252391edbb40fdd5d5414 Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Date: Fri, 15 Nov 2024 10:21:49 +0000
+Subject: dma-fence: Fix reference leak on fence merge failure path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+commit 949291c5314009b4f6e252391edbb40fdd5d5414 upstream.
+
+Release all fence references if the output dma-fence-array could not be
+allocated.
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3")
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Sumit Semwal <sumit.semwal@linaro.org>
+Cc: Gustavo Padovan <gustavo@padovan.org>
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: linux-media@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Cc: linaro-mm-sig@lists.linaro.org
+Cc: <stable@vger.kernel.org> # v6.0+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-2-tursulin@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-unwrap.c |    2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/dma-buf/dma-fence-unwrap.c
++++ b/drivers/dma-buf/dma-fence-unwrap.c
+@@ -164,6 +164,8 @@ restart:
+                                       dma_fence_context_alloc(1),
+                                       1, false);
+       if (!result) {
++              for (i = 0; i < count; i++)
++                      dma_fence_put(array[i]);
+               tmp = NULL;
+               goto return_tmp;
+       }
diff --git a/queue-6.12/dma-fence-use-kernel-s-sort-for-merging-fences.patch b/queue-6.12/dma-fence-use-kernel-s-sort-for-merging-fences.patch
new file mode 100644 (file)
index 0000000..94f11c4
--- /dev/null
@@ -0,0 +1,271 @@
+From fe52c649438b8489c9456681d93a9b3de3d38263 Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Date: Fri, 15 Nov 2024 10:21:50 +0000
+Subject: dma-fence: Use kernel's sort for merging fences
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+commit fe52c649438b8489c9456681d93a9b3de3d38263 upstream.
+
+One alternative to the fix Christian proposed in
+https://lore.kernel.org/dri-devel/20241024124159.4519-3-christian.koenig@amd.com/
+is to replace the rather complex open coded sorting loops with the kernel
+standard sort followed by a context squashing pass.
+
+Proposed advantage of this would be readability but one concern Christian
+raised was that there could be many fences, that they are typically mostly
+sorted, and so the kernel's heap sort would be much worse by the proposed
+algorithm.
+
+I had a look running some games and vkcube to see what are the typical
+number of input fences. Tested scenarios:
+
+1) Hogwarts Legacy under Gamescope
+
+450 calls per second to __dma_fence_unwrap_merge.
+
+Percentages per number of fences buckets, before and after checking for
+signalled status, sorting and flattening:
+
+   N       Before      After
+   0       0.91%
+   1      69.40%
+  2-3     28.72%       9.4%  (90.6% resolved to one fence)
+  4-5      0.93%
+  6-9      0.03%
+  10+
+
+2) Cyberpunk 2077 under Gamescope
+
+1050 calls per second, amounting to 0.01% CPU time according to perf top.
+
+   N       Before      After
+   0       1.13%
+   1      52.30%
+  2-3     40.34%       55.57%
+  4-5      1.46%        0.50%
+  6-9      2.44%
+  10+      2.34%
+
+3) vkcube under Plasma
+
+90 calls per second.
+
+   N       Before      After
+   0
+   1
+  2-3      100%         0%   (Ie. all resolved to a single fence)
+  4-5
+  6-9
+  10+
+
+In the case of vkcube all invocations in the 2-3 bucket were actually
+just two input fences.
+
+From these numbers it looks like the heap sort should not be a
+disadvantage, given how the dominant case is <= 2 input fences which heap
+sort solves with just one compare and swap. (And for the case of one input
+fence we have a fast path in the previous patch.)
+
+A complementary possibility is to implement a different sorting algorithm
+under the same API as the kernel's sort() and so keep the simplicity,
+potentially moving the new sort under lib/ if it would be found more
+widely useful.
+
+v2:
+ * Hold on to fence references and reduce commentary. (Christian)
+ * Record and use latest signaled timestamp in the 2nd loop too.
+ * Consolidate zero or one fences fast paths.
+
+v3:
+ * Reverse the seqno sort order for a simpler squashing pass. (Christian)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3")
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3617
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Sumit Semwal <sumit.semwal@linaro.org>
+Cc: Gustavo Padovan <gustavo@padovan.org>
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: linux-media@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Cc: linaro-mm-sig@lists.linaro.org
+Cc: <stable@vger.kernel.org> # v6.0+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-3-tursulin@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-unwrap.c |  126 +++++++++++++++++--------------------
+ 1 file changed, 60 insertions(+), 66 deletions(-)
+
+--- a/drivers/dma-buf/dma-fence-unwrap.c
++++ b/drivers/dma-buf/dma-fence-unwrap.c
+@@ -12,6 +12,7 @@
+ #include <linux/dma-fence-chain.h>
+ #include <linux/dma-fence-unwrap.h>
+ #include <linux/slab.h>
++#include <linux/sort.h>
+ /* Internal helper to start new array iteration, don't use directly */
+ static struct dma_fence *
+@@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(
+ }
+ EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
++
++static int fence_cmp(const void *_a, const void *_b)
++{
++      struct dma_fence *a = *(struct dma_fence **)_a;
++      struct dma_fence *b = *(struct dma_fence **)_b;
++
++      if (a->context < b->context)
++              return -1;
++      else if (a->context > b->context)
++              return 1;
++
++      if (dma_fence_is_later(b, a))
++              return 1;
++      else if (dma_fence_is_later(a, b))
++              return -1;
++
++      return 0;
++}
++
+ /* Implementation for the dma_fence_merge() marco, don't use directly */
+ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
+                                          struct dma_fence **fences,
+@@ -67,8 +87,7 @@ struct dma_fence *__dma_fence_unwrap_mer
+       struct dma_fence_array *result;
+       struct dma_fence *tmp, **array;
+       ktime_t timestamp;
+-      unsigned int i;
+-      size_t count;
++      int i, j, count;
+       count = 0;
+       timestamp = ns_to_ktime(0);
+@@ -96,80 +115,55 @@ struct dma_fence *__dma_fence_unwrap_mer
+       if (!array)
+               return NULL;
+-      /*
+-       * This trashes the input fence array and uses it as position for the
+-       * following merge loop. This works because the dma_fence_merge()
+-       * wrapper macro is creating this temporary array on the stack together
+-       * with the iterators.
+-       */
+-      for (i = 0; i < num_fences; ++i)
+-              fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
+-
+       count = 0;
+-      do {
+-              unsigned int sel;
+-
+-restart:
+-              tmp = NULL;
+-              for (i = 0; i < num_fences; ++i) {
+-                      struct dma_fence *next;
+-
+-                      while (fences[i] && dma_fence_is_signaled(fences[i]))
+-                              fences[i] = dma_fence_unwrap_next(&iter[i]);
+-
+-                      next = fences[i];
+-                      if (!next)
+-                              continue;
+-
+-                      /*
+-                       * We can't guarantee that inpute fences are ordered by
+-                       * context, but it is still quite likely when this
+-                       * function is used multiple times. So attempt to order
+-                       * the fences by context as we pass over them and merge
+-                       * fences with the same context.
+-                       */
+-                      if (!tmp || tmp->context > next->context) {
+-                              tmp = next;
+-                              sel = i;
+-
+-                      } else if (tmp->context < next->context) {
+-                              continue;
+-
+-                      } else if (dma_fence_is_later(tmp, next)) {
+-                              fences[i] = dma_fence_unwrap_next(&iter[i]);
+-                              goto restart;
++      for (i = 0; i < num_fences; ++i) {
++              dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
++                      if (!dma_fence_is_signaled(tmp)) {
++                              array[count++] = dma_fence_get(tmp);
+                       } else {
+-                              fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+-                              goto restart;
++                              ktime_t t = dma_fence_timestamp(tmp);
++
++                              if (ktime_after(t, timestamp))
++                                      timestamp = t;
+                       }
+               }
++      }
+-              if (tmp) {
+-                      array[count++] = dma_fence_get(tmp);
+-                      fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+-              }
+-      } while (tmp);
++      if (count == 0 || count == 1)
++              goto return_fastpath;
+-      if (count == 0) {
+-              tmp = dma_fence_allocate_private_stub(ktime_get());
+-              goto return_tmp;
+-      }
++      sort(array, count, sizeof(*array), fence_cmp, NULL);
+-      if (count == 1) {
+-              tmp = array[0];
+-              goto return_tmp;
++      /*
++       * Only keep the most recent fence for each context.
++       */
++      j = 0;
++      for (i = 1; i < count; i++) {
++              if (array[i]->context == array[j]->context)
++                      dma_fence_put(array[i]);
++              else
++                      array[++j] = array[i];
+       }
++      count = ++j;
+-      result = dma_fence_array_create(count, array,
+-                                      dma_fence_context_alloc(1),
+-                                      1, false);
+-      if (!result) {
+-              for (i = 0; i < count; i++)
+-                      dma_fence_put(array[i]);
+-              tmp = NULL;
+-              goto return_tmp;
++      if (count > 1) {
++              result = dma_fence_array_create(count, array,
++                                              dma_fence_context_alloc(1),
++                                              1, false);
++              if (!result) {
++                      for (i = 0; i < count; i++)
++                              dma_fence_put(array[i]);
++                      tmp = NULL;
++                      goto return_tmp;
++              }
++              return &result->base;
+       }
+-      return &result->base;
++
++return_fastpath:
++      if (count == 0)
++              tmp = dma_fence_allocate_private_stub(timestamp);
++      else
++              tmp = array[0];
+ return_tmp:
+       kfree(array);
diff --git a/queue-6.12/drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch b/queue-6.12/drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch
new file mode 100644 (file)
index 0000000..2b162b9
--- /dev/null
@@ -0,0 +1,81 @@
+From 63e7ee677c74e981257cedfdd8543510d09096ba Mon Sep 17 00:00:00 2001
+From: Peterson Guo <peterson.guo@amd.com>
+Date: Thu, 7 Nov 2024 19:20:02 -0500
+Subject: drm/amd/display: Add a left edge pixel if in YCbCr422 or YCbCr420 and odm
+
+From: Peterson Guo <peterson.guo@amd.com>
+
+commit 63e7ee677c74e981257cedfdd8543510d09096ba upstream.
+
+[WHY]
+On some cards when odm is used, the monitor will have 2 separate pipes
+split vertically. When compression is used on the YCbCr colour space on
+the second pipe to have correct colours, we need to read a pixel from the
+end of first pipe to accurately display colours. Hardware was programmed
+properly to account for this extra pixel but it was not calculated
+properly in software causing a split screen on some monitors.
+
+[HOW]
+The fix adjusts the second pipe's viewport and timings if the pixel
+encoding is YCbCr422 or YCbCr420.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: George Shen <george.shen@amd.com>
+Signed-off-by: Peterson Guo <peterson.guo@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c |   23 ++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+@@ -1511,6 +1511,7 @@ bool dcn20_split_stream_for_odm(
+       if (prev_odm_pipe->plane_state) {
+               struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data;
++              struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp;
+               int new_width;
+               /* HACTIVE halved for odm combine */
+@@ -1544,7 +1545,28 @@ bool dcn20_split_stream_for_odm(
+               sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int(
+                               sd->ratios.horz_c, sd->h_active - sd->recout.x));
+               sd->recout.x = 0;
++
++              /*
++               * When odm is used in YcbCr422 or 420 colour space, a split screen
++               * will be seen with the previous calculations since the extra left
++               *  edge pixel is accounted for in fmt but not in viewport.
++               *
++               * Below are calculations which fix the split by fixing the calculations
++               * if there is an extra left edge pixel.
++               */
++              if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count
++                              && opp->funcs->opp_get_left_edge_extra_pixel_count(
++                                      opp, next_odm_pipe->stream->timing.pixel_encoding,
++                                      resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) {
++                      sd->h_active += 1;
++                      sd->recout.width += 1;
++                      sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++                      sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++                      sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++                      sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++              }
+       }
++
+       if (!next_odm_pipe->top_pipe)
+               next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
+       else
+@@ -2133,6 +2155,7 @@ bool dcn20_fast_validate_bw(
+                       ASSERT(0);
+               }
+       }
++
+       /* Actual dsc count per stream dsc validation*/
+       if (!dcn20_validate_dsc(dc, context)) {
+               context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
diff --git a/queue-6.12/drm-amd-display-correct-prefetch-calculation.patch b/queue-6.12/drm-amd-display-correct-prefetch-calculation.patch
new file mode 100644 (file)
index 0000000..eb418b6
--- /dev/null
@@ -0,0 +1,39 @@
+From 24d3749c11d949972d8c22e75567dc90ff5482e7 Mon Sep 17 00:00:00 2001
+From: Lo-an Chen <lo-an.chen@amd.com>
+Date: Thu, 14 Nov 2024 17:53:41 +0800
+Subject: drm/amd/display: Correct prefetch calculation
+
+From: Lo-an Chen <lo-an.chen@amd.com>
+
+commit 24d3749c11d949972d8c22e75567dc90ff5482e7 upstream.
+
+[WHY]
+The minimum value of the dst_y_prefetch_equ was not correct
+in prefetch calculation whice causes OPTC underflow.
+
+[HOW]
+Add the min operation of dst_y_prefetch_equ in prefetch calculation.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Lo-an Chen <lo-an.chen@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c |    1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
++++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+@@ -1222,6 +1222,7 @@ static dml_bool_t CalculatePrefetchSched
+       s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+       s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
++      s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+ #ifdef __DML_VBA_DEBUG__
+       dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
diff --git a/queue-6.12/drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch b/queue-6.12/drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch
new file mode 100644 (file)
index 0000000..cddf4f3
--- /dev/null
@@ -0,0 +1,208 @@
+From a29997b7ac1f5c816b543e0c56aa2b5b56baac24 Mon Sep 17 00:00:00 2001
+From: Dillon Varone <dillon.varone@amd.com>
+Date: Wed, 13 Nov 2024 16:44:15 -0500
+Subject: drm/amd/display: Limit VTotal range to max hw cap minus fp
+
+From: Dillon Varone <dillon.varone@amd.com>
+
+commit a29997b7ac1f5c816b543e0c56aa2b5b56baac24 upstream.
+
+[WHY & HOW]
+Hardware does not support the VTotal to be between fp2 lines of the
+maximum possible VTotal, so add a capability flag to track it and apply
+where necessary.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jun Lei <jun.lei@amd.com>
+Reviewed-by: Anthony Koo <anthony.koo@amd.com>
+Signed-off-by: Dillon Varone <dillon.varone@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc.h                                  |    1 
+ drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c |   27 +++++++++-
+ drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c       |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c     |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c     |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c       |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c     |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c       |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c     |    1 
+ drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c     |    1 
+ drivers/gpu/drm/amd/display/modules/freesync/freesync.c              |   13 ++++
+ 11 files changed, 46 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -285,6 +285,7 @@ struct dc_caps {
+       uint16_t subvp_vertical_int_margin_us;
+       bool seamless_odm;
+       uint32_t max_v_total;
++      bool vtotal_limited_by_fp2;
+       uint32_t max_disp_clock_khz_at_vmin;
+       uint8_t subvp_drr_vblank_start_margin_us;
+       bool cursor_not_scaled;
+--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
++++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+@@ -339,11 +339,22 @@ void dml21_apply_soc_bb_overrides(struct
+       // }
+ }
++static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
++{
++      unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
++
++      if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
++              max_hw_v_total -= stream->timing.v_front_porch + 1;
++      }
++
++      return max_hw_v_total;
++}
++
+ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
+               struct dc_stream_state *stream,
+               struct dml2_context *dml_ctx)
+ {
+-      unsigned int hblank_start, vblank_start;
++      unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
+       timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
+       timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+@@ -371,11 +382,23 @@ static void populate_dml21_timing_config
+               - stream->timing.v_border_top - stream->timing.v_border_bottom;
+       timing->drr_config.enabled = stream->ignore_msa_timing_param;
+-      timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
+       timing->drr_config.drr_active_variable = stream->vrr_active_variable;
+       timing->drr_config.drr_active_fixed = stream->vrr_active_fixed;
+       timing->drr_config.disallowed = !stream->allow_freesync;
++      /* limit min refresh rate to DC cap */
++      min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
++      if (stream->ctx->dc->caps.max_v_total != 0) {
++              min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
++                              (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
++      }
++
++      if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) {
++              timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
++      } else {
++              timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz;
++      }
++
+       if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase &&
+                       stream->ctx->dc->config.enable_fpo_flicker_detection == 1)
+               timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false);
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+@@ -2354,6 +2354,7 @@ static bool dcn30_resource_construct(
+       dc->caps.dp_hdmi21_pcon_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* read VBIOS LTTPR caps */
+       {
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
+@@ -1234,6 +1234,7 @@ static bool dcn302_resource_construct(
+       dc->caps.extended_aux_timeout_support = true;
+       dc->caps.dmcub_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
+@@ -1179,6 +1179,7 @@ static bool dcn303_resource_construct(
+       dc->caps.extended_aux_timeout_support = true;
+       dc->caps.dmcub_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+@@ -2186,6 +2186,7 @@ static bool dcn32_resource_construct(
+       dc->caps.dmcub_support = true;
+       dc->caps.seamless_odm = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+@@ -1743,6 +1743,7 @@ static bool dcn321_resource_construct(
+       dc->caps.extended_aux_timeout_support = true;
+       dc->caps.dmcub_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+@@ -1850,6 +1850,7 @@ static bool dcn35_resource_construct(
+       dc->caps.zstate_support = true;
+       dc->caps.ips_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+@@ -1829,6 +1829,7 @@ static bool dcn351_resource_construct(
+       dc->caps.zstate_support = true;
+       dc->caps.ips_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       /* Color pipeline capabilities */
+       dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+@@ -1826,6 +1826,7 @@ static bool dcn401_resource_construct(
+       dc->caps.extended_aux_timeout_support = true;
+       dc->caps.dmcub_support = true;
+       dc->caps.max_v_total = (1 << 15) - 1;
++      dc->caps.vtotal_limited_by_fp2 = true;
+       if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev))
+               dc->caps.dcc_plane_width_limit = 7680;
+--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
++++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+@@ -121,6 +121,17 @@ static unsigned int calc_duration_in_us_
+       return duration_in_us;
+ }
++static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
++{
++      unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
++
++      if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
++              max_hw_v_total -= stream->timing.v_front_porch + 1;
++      }
++
++      return max_hw_v_total;
++}
++
+ unsigned int mod_freesync_calc_v_total_from_refresh(
+               const struct dc_stream_state *stream,
+               unsigned int refresh_in_uhz)
+@@ -1002,7 +1013,7 @@ void mod_freesync_build_vrr_params(struc
+       if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) {
+               min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
+-                      (stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total));
++                      (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
+       }
+       /* Limit minimum refresh rate to what can be supported by hardware */
+       min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ?
diff --git a/queue-6.12/drm-amd-pm-fix-and-simplify-workload-handling.patch b/queue-6.12/drm-amd-pm-fix-and-simplify-workload-handling.patch
new file mode 100644 (file)
index 0000000..bb2e4a6
--- /dev/null
@@ -0,0 +1,1653 @@
+From 1443dd3c67f6d1a8bd1f810e598e2f0c6f19205c Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Sat, 16 Nov 2024 08:20:59 -0500
+Subject: drm/amd/pm: fix and simplify workload handling
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 1443dd3c67f6d1a8bd1f810e598e2f0c6f19205c upstream.
+
+smu->workload_mask is IP specific and should not be messed with in
+the common code. The mask bits vary across SMU versions.
+
+Move all handling of smu->workload_mask in to the backends and
+simplify the code.  Store the user's preference in smu->power_profile_mode
+which will be reflected in sysfs.  For internal driver profile
+switches for KFD or VCN, just update the workload mask so that the
+user's preference is retained.  Remove all of the extra now unused
+workload related elements in the smu structure.
+
+v2: use refcounts for workload profiles
+v3: rework based on feedback from Lijo
+v4: fix the refcount on failure, drop backend mask
+v5: rework custom handling
+v6: handle failure cleanup with custom profile
+v7: Update documentation
+
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: Kenneth Feng <kenneth.feng@amd.com>
+Cc: Lijo Lazar <lijo.lazar@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/amdgpu_pm.c                      |    6 
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c               |  148 ++++++++-----
+ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h           |   15 -
+ drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c       |  166 ++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c         |  167 +++++++++------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c |  168 +++++++++------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c        |   41 +--
+ drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c         |   43 +--
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c    |  167 ++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c    |  138 +++++++-----
+ drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c    |  176 +++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c                  |   25 ++
+ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h                  |    4 
+ 13 files changed, 744 insertions(+), 520 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -1409,7 +1409,11 @@ static ssize_t amdgpu_set_pp_mclk_od(str
+  * create a custom set of heuristics, write a string of numbers to the file
+  * starting with the number of the custom profile along with a setting
+  * for each heuristic parameter.  Due to differences across asic families
+- * the heuristic parameters vary from family to family.
++ * the heuristic parameters vary from family to family. Additionally,
++ * you can apply the custom heuristics to different clock domains.  Each
++ * clock domain is considered a distinct operation so if you modify the
++ * gfxclk heuristics and then the memclk heuristics, the all of the
++ * custom heuristics will be retained until you switch to another profile.
+  *
+  */
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -72,6 +72,10 @@ static int smu_set_power_limit(void *han
+ static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
+ static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
+ static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
++static void smu_power_profile_mode_get(struct smu_context *smu,
++                                     enum PP_SMC_POWER_PROFILE profile_mode);
++static void smu_power_profile_mode_put(struct smu_context *smu,
++                                     enum PP_SMC_POWER_PROFILE profile_mode);
+ static int smu_sys_get_pp_feature_mask(void *handle,
+                                      char *buf)
+@@ -1257,35 +1261,19 @@ static int smu_sw_init(void *handle)
+       INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn);
+       atomic64_set(&smu->throttle_int_counter, 0);
+       smu->watermarks_bitmap = 0;
+-      smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+-      smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+       atomic_set(&smu->smu_power.power_gate.vcn_gated, 1);
+       atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1);
+       atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
+       atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
+-      smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
+-
+       if (smu->is_apu ||
+           !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
+-              smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
++              smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+       else
+-              smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
++              smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
++      smu_power_profile_mode_get(smu, smu->power_profile_mode);
+-      smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+-      smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+-      smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
+-      smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
+-      smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
+-      smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
+-      smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM;
+       smu->display_config = &adev->pm.pm_display_cfg;
+       smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
+@@ -1338,6 +1326,11 @@ static int smu_sw_fini(void *handle)
+               return ret;
+       }
++      if (smu->custom_profile_params) {
++              kfree(smu->custom_profile_params);
++              smu->custom_profile_params = NULL;
++      }
++
+       smu_fini_microcode(smu);
+       return 0;
+@@ -2117,6 +2110,9 @@ static int smu_suspend(void *handle)
+       if (!ret)
+               adev->gfx.gfx_off_entrycount = count;
++      /* clear this on suspend so it will get reprogrammed on resume */
++      smu->workload_mask = 0;
++
+       return 0;
+ }
+@@ -2229,25 +2225,49 @@ static int smu_enable_umd_pstate(void *h
+ }
+ static int smu_bump_power_profile_mode(struct smu_context *smu,
+-                                         long *param,
+-                                         uint32_t param_size)
++                                     long *custom_params,
++                                     u32 custom_params_max_idx)
+ {
+-      int ret = 0;
++      u32 workload_mask = 0;
++      int i, ret = 0;
++
++      for (i = 0; i < PP_SMC_POWER_PROFILE_COUNT; i++) {
++              if (smu->workload_refcount[i])
++                      workload_mask |= 1 << i;
++      }
++
++      if (smu->workload_mask == workload_mask)
++              return 0;
+       if (smu->ppt_funcs->set_power_profile_mode)
+-              ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
++              ret = smu->ppt_funcs->set_power_profile_mode(smu, workload_mask,
++                                                           custom_params,
++                                                           custom_params_max_idx);
++
++      if (!ret)
++              smu->workload_mask = workload_mask;
+       return ret;
+ }
++static void smu_power_profile_mode_get(struct smu_context *smu,
++                                     enum PP_SMC_POWER_PROFILE profile_mode)
++{
++      smu->workload_refcount[profile_mode]++;
++}
++
++static void smu_power_profile_mode_put(struct smu_context *smu,
++                                     enum PP_SMC_POWER_PROFILE profile_mode)
++{
++      if (smu->workload_refcount[profile_mode])
++              smu->workload_refcount[profile_mode]--;
++}
++
+ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
+                                         enum amd_dpm_forced_level level,
+-                                        bool skip_display_settings,
+-                                        bool init)
++                                        bool skip_display_settings)
+ {
+       int ret = 0;
+-      int index = 0;
+-      long workload[1];
+       struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+       if (!skip_display_settings) {
+@@ -2284,14 +2304,8 @@ static int smu_adjust_power_state_dynami
+       }
+       if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
+-              smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+-              index = fls(smu->workload_mask);
+-              index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+-              workload[0] = smu->workload_setting[index];
+-
+-              if (init || smu->power_profile_mode != workload[0])
+-                      smu_bump_power_profile_mode(smu, workload, 0);
+-      }
++          smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
++              smu_bump_power_profile_mode(smu, NULL, 0);
+       return ret;
+ }
+@@ -2310,13 +2324,13 @@ static int smu_handle_task(struct smu_co
+               ret = smu_pre_display_config_changed(smu);
+               if (ret)
+                       return ret;
+-              ret = smu_adjust_power_state_dynamic(smu, level, false, false);
++              ret = smu_adjust_power_state_dynamic(smu, level, false);
+               break;
+       case AMD_PP_TASK_COMPLETE_INIT:
+-              ret = smu_adjust_power_state_dynamic(smu, level, true, true);
++              ret = smu_adjust_power_state_dynamic(smu, level, true);
+               break;
+       case AMD_PP_TASK_READJUST_POWER_STATE:
+-              ret = smu_adjust_power_state_dynamic(smu, level, true, false);
++              ret = smu_adjust_power_state_dynamic(smu, level, true);
+               break;
+       default:
+               break;
+@@ -2338,12 +2352,11 @@ static int smu_handle_dpm_task(void *han
+ static int smu_switch_power_profile(void *handle,
+                                   enum PP_SMC_POWER_PROFILE type,
+-                                  bool en)
++                                  bool enable)
+ {
+       struct smu_context *smu = handle;
+       struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+-      long workload[1];
+-      uint32_t index;
++      int ret;
+       if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+               return -EOPNOTSUPP;
+@@ -2351,21 +2364,21 @@ static int smu_switch_power_profile(void
+       if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
+               return -EINVAL;
+-      if (!en) {
+-              smu->workload_mask &= ~(1 << smu->workload_prority[type]);
+-              index = fls(smu->workload_mask);
+-              index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+-              workload[0] = smu->workload_setting[index];
+-      } else {
+-              smu->workload_mask |= (1 << smu->workload_prority[type]);
+-              index = fls(smu->workload_mask);
+-              index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+-              workload[0] = smu->workload_setting[index];
+-      }
+-
+       if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
+-              smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
+-              smu_bump_power_profile_mode(smu, workload, 0);
++          smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
++              if (enable)
++                      smu_power_profile_mode_get(smu, type);
++              else
++                      smu_power_profile_mode_put(smu, type);
++              ret = smu_bump_power_profile_mode(smu, NULL, 0);
++              if (ret) {
++                      if (enable)
++                              smu_power_profile_mode_put(smu, type);
++                      else
++                              smu_power_profile_mode_get(smu, type);
++                      return ret;
++              }
++      }
+       return 0;
+ }
+@@ -3053,12 +3066,35 @@ static int smu_set_power_profile_mode(vo
+                                     uint32_t param_size)
+ {
+       struct smu_context *smu = handle;
++      bool custom = false;
++      int ret = 0;
+       if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
+           !smu->ppt_funcs->set_power_profile_mode)
+               return -EOPNOTSUPP;
+-      return smu_bump_power_profile_mode(smu, param, param_size);
++      if (param[param_size] == PP_SMC_POWER_PROFILE_CUSTOM) {
++              custom = true;
++              /* clear frontend mask so custom changes propogate */
++              smu->workload_mask = 0;
++      }
++
++      if ((param[param_size] != smu->power_profile_mode) || custom) {
++              /* clear the old user preference */
++              smu_power_profile_mode_put(smu, smu->power_profile_mode);
++              /* set the new user preference */
++              smu_power_profile_mode_get(smu, param[param_size]);
++              ret = smu_bump_power_profile_mode(smu,
++                                                custom ? param : NULL,
++                                                custom ? param_size : 0);
++              if (ret)
++                      smu_power_profile_mode_put(smu, param[param_size]);
++              else
++                      /* store the user's preference */
++                      smu->power_profile_mode = param[param_size];
++      }
++
++      return ret;
+ }
+ static int smu_get_fan_control_mode(void *handle, u32 *fan_mode)
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+@@ -556,11 +556,13 @@ struct smu_context {
+       uint32_t hard_min_uclk_req_from_dal;
+       bool disable_uclk_switch;
++      /* asic agnostic workload mask */
+       uint32_t workload_mask;
+-      uint32_t workload_prority[WORKLOAD_POLICY_MAX];
+-      uint32_t workload_setting[WORKLOAD_POLICY_MAX];
++      /* default/user workload preference */
+       uint32_t power_profile_mode;
+-      uint32_t default_power_profile_mode;
++      uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
++      /* backend specific custom workload settings */
++      long *custom_profile_params;
+       bool pm_enabled;
+       bool is_apu;
+@@ -731,9 +733,12 @@ struct pptable_funcs {
+        * @set_power_profile_mode: Set a power profile mode. Also used to
+        *                          create/set custom power profile modes.
+        * &input: Power profile mode parameters.
+-       * &size: Size of &input.
++       * &workload_mask: mask of workloads to enable
++       * &custom_params: custom profile parameters
++       * &custom_params_max_idx: max valid idx into custom_params
+        */
+-      int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
++      int (*set_power_profile_mode)(struct smu_context *smu, u32 workload_mask,
++                                    long *custom_params, u32 custom_params_max_idx);
+       /**
+        * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+@@ -1445,98 +1445,120 @@ static int arcturus_get_power_profile_mo
+       return size;
+ }
+-static int arcturus_set_power_profile_mode(struct smu_context *smu,
+-                                         long *input,
+-                                         uint32_t size)
++#define ARCTURUS_CUSTOM_PARAMS_COUNT 10
++#define ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define ARCTURUS_CUSTOM_PARAMS_SIZE (ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT * ARCTURUS_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int arcturus_set_power_profile_mode_coeff(struct smu_context *smu,
++                                               long *input)
+ {
+       DpmActivityMonitorCoeffInt_t activity_monitor;
+-      int workload_type = 0;
+-      uint32_t profile_mode = input[size];
+-      int ret = 0;
++      int ret, idx;
+-      if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+-              return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor),
++                                 false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
+       }
++      idx = 0 * ARCTURUS_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor.Gfx_FPS = input[idx + 1];
++              activity_monitor.Gfx_UseRlcBusy = input[idx + 2];
++              activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
++              activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
++              activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
++              activity_monitor.Gfx_BoosterFreq = input[idx + 6];
++              activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
++              activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++      }
++      idx = 1 * ARCTURUS_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Uclk */
++              activity_monitor.Mem_FPS = input[idx + 1];
++              activity_monitor.Mem_UseRlcBusy = input[idx + 2];
++              activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
++              activity_monitor.Mem_MinActiveFreq = input[idx + 4];
++              activity_monitor.Mem_BoosterFreqType = input[idx + 5];
++              activity_monitor.Mem_BoosterFreq = input[idx + 6];
++              activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
++              activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
++      }
+-      if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
+-           (smu->smc_fw_version >= 0x360d00)) {
+-              if (size != 10)
+-                      return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor),
++                                 true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                     WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor),
+-                                     false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
++      return ret;
++}
++
++static int arcturus_set_power_profile_mode(struct smu_context *smu,
++                                         u32 workload_mask,
++                                         long *custom_params,
++                                         u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int ret, idx = -1, i;
++
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor.Gfx_FPS = input[1];
+-                      activity_monitor.Gfx_UseRlcBusy = input[2];
+-                      activity_monitor.Gfx_MinActiveFreqType = input[3];
+-                      activity_monitor.Gfx_MinActiveFreq = input[4];
+-                      activity_monitor.Gfx_BoosterFreqType = input[5];
+-                      activity_monitor.Gfx_BoosterFreq = input[6];
+-                      activity_monitor.Gfx_PD_Data_limit_c = input[7];
+-                      activity_monitor.Gfx_PD_Data_error_coeff = input[8];
+-                      activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              case 1: /* Uclk */
+-                      activity_monitor.Mem_FPS = input[1];
+-                      activity_monitor.Mem_UseRlcBusy = input[2];
+-                      activity_monitor.Mem_MinActiveFreqType = input[3];
+-                      activity_monitor.Mem_MinActiveFreq = input[4];
+-                      activity_monitor.Mem_BoosterFreqType = input[5];
+-                      activity_monitor.Mem_BoosterFreq = input[6];
+-                      activity_monitor.Mem_PD_Data_limit_c = input[7];
+-                      activity_monitor.Mem_PD_Data_error_coeff = input[8];
+-                      activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              default:
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (smu->smc_fw_version < 0x360d00)
+                       return -EINVAL;
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params =
++                              kzalloc(ARCTURUS_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
+               }
+-
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                     WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor),
+-                                     true);
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != ARCTURUS_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * ARCTURUS_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = arcturus_set_power_profile_mode_coeff(smu,
++                                                          smu->custom_profile_params);
+               if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
+                       return ret;
+               }
+-      }
+-
+-      /*
+-       * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
+-       * Not all profile modes are supported on arcturus.
+-       */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     profile_mode);
+-      if (workload_type < 0) {
+-              dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
+-              return -EINVAL;
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, ARCTURUS_CUSTOM_PARAMS_SIZE);
+       }
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+-                                        SMU_MSG_SetWorkloadMask,
+-                                        1 << workload_type,
+-                                        NULL);
++                                            SMU_MSG_SetWorkloadMask,
++                                            backend_workload_mask,
++                                            NULL);
+       if (ret) {
+-              dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
+               return ret;
+       }
+-      smu->power_profile_mode = profile_mode;
+-
+-      return 0;
++      return ret;
+ }
+ static int arcturus_set_performance_level(struct smu_context *smu,
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+@@ -2004,87 +2004,122 @@ static int navi10_get_power_profile_mode
+       return size;
+ }
+-static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define NAVI10_CUSTOM_PARAMS_COUNT 10
++#define NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT 3
++#define NAVI10_CUSTOM_PARAMS_SIZE (NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT * NAVI10_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int navi10_set_power_profile_mode_coeff(struct smu_context *smu,
++                                             long *input)
+ {
+       DpmActivityMonitorCoeffInt_t activity_monitor;
+-      int workload_type, ret = 0;
++      int ret, idx;
+-      smu->power_profile_mode = input[size];
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor), false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
++      }
+-      if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+-              return -EINVAL;
++      idx = 0 * NAVI10_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor.Gfx_FPS = input[idx + 1];
++              activity_monitor.Gfx_MinFreqStep = input[idx + 2];
++              activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
++              activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
++              activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
++              activity_monitor.Gfx_BoosterFreq = input[idx + 6];
++              activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
++              activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++      }
++      idx = 1 * NAVI10_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Socclk */
++              activity_monitor.Soc_FPS = input[idx + 1];
++              activity_monitor.Soc_MinFreqStep = input[idx + 2];
++              activity_monitor.Soc_MinActiveFreqType = input[idx + 3];
++              activity_monitor.Soc_MinActiveFreq = input[idx + 4];
++              activity_monitor.Soc_BoosterFreqType = input[idx + 5];
++              activity_monitor.Soc_BoosterFreq = input[idx + 6];
++              activity_monitor.Soc_PD_Data_limit_c = input[idx + 7];
++              activity_monitor.Soc_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor.Soc_PD_Data_error_rate_coeff = input[idx + 9];
++      }
++      idx = 2 * NAVI10_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Memclk */
++              activity_monitor.Mem_FPS = input[idx + 1];
++              activity_monitor.Mem_MinFreqStep = input[idx + 2];
++              activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
++              activity_monitor.Mem_MinActiveFreq = input[idx + 4];
++              activity_monitor.Mem_BoosterFreqType = input[idx + 5];
++              activity_monitor.Mem_BoosterFreq = input[idx + 6];
++              activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
++              activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
+       }
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+-              if (size != 10)
+-                      return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor), true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor), false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
++      return ret;
++}
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor.Gfx_FPS = input[1];
+-                      activity_monitor.Gfx_MinFreqStep = input[2];
+-                      activity_monitor.Gfx_MinActiveFreqType = input[3];
+-                      activity_monitor.Gfx_MinActiveFreq = input[4];
+-                      activity_monitor.Gfx_BoosterFreqType = input[5];
+-                      activity_monitor.Gfx_BoosterFreq = input[6];
+-                      activity_monitor.Gfx_PD_Data_limit_c = input[7];
+-                      activity_monitor.Gfx_PD_Data_error_coeff = input[8];
+-                      activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              case 1: /* Socclk */
+-                      activity_monitor.Soc_FPS = input[1];
+-                      activity_monitor.Soc_MinFreqStep = input[2];
+-                      activity_monitor.Soc_MinActiveFreqType = input[3];
+-                      activity_monitor.Soc_MinActiveFreq = input[4];
+-                      activity_monitor.Soc_BoosterFreqType = input[5];
+-                      activity_monitor.Soc_BoosterFreq = input[6];
+-                      activity_monitor.Soc_PD_Data_limit_c = input[7];
+-                      activity_monitor.Soc_PD_Data_error_coeff = input[8];
+-                      activity_monitor.Soc_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              case 2: /* Memclk */
+-                      activity_monitor.Mem_FPS = input[1];
+-                      activity_monitor.Mem_MinFreqStep = input[2];
+-                      activity_monitor.Mem_MinActiveFreqType = input[3];
+-                      activity_monitor.Mem_MinActiveFreq = input[4];
+-                      activity_monitor.Mem_BoosterFreqType = input[5];
+-                      activity_monitor.Mem_BoosterFreq = input[6];
+-                      activity_monitor.Mem_PD_Data_limit_c = input[7];
+-                      activity_monitor.Mem_PD_Data_error_coeff = input[8];
+-                      activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
++static int navi10_set_power_profile_mode(struct smu_context *smu,
++                                       u32 workload_mask,
++                                       long *custom_params,
++                                       u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int ret, idx = -1, i;
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor), true);
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
++
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params = kzalloc(NAVI10_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
++              }
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != NAVI10_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * NAVI10_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = navi10_set_power_profile_mode_coeff(smu,
++                                                        smu->custom_profile_params);
+               if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
+                       return ret;
+               }
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, NAVI10_CUSTOM_PARAMS_SIZE);
+       }
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     smu->power_profile_mode);
+-      if (workload_type < 0)
+-              return -EINVAL;
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+-                                  1 << workload_type, NULL);
+-      if (ret)
+-              dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
++                                            backend_workload_mask, NULL);
++      if (ret) {
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
++              return ret;
++      }
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+@@ -1706,90 +1706,126 @@ static int sienna_cichlid_get_power_prof
+       return size;
+ }
+-static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define SIENNA_CICHLID_CUSTOM_PARAMS_COUNT 10
++#define SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT 3
++#define SIENNA_CICHLID_CUSTOM_PARAMS_SIZE (SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int sienna_cichlid_set_power_profile_mode_coeff(struct smu_context *smu,
++                                                     long *input)
+ {
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+-      int workload_type, ret = 0;
++      int ret, idx;
+-      smu->power_profile_mode = input[size];
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external), false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
++      }
+-      if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+-              return -EINVAL;
++      idx = 0 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor->Gfx_FPS = input[idx + 1];
++              activity_monitor->Gfx_MinFreqStep = input[idx + 2];
++              activity_monitor->Gfx_MinActiveFreqType = input[idx + 3];
++              activity_monitor->Gfx_MinActiveFreq = input[idx + 4];
++              activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
++              activity_monitor->Gfx_BoosterFreq = input[idx + 6];
++              activity_monitor->Gfx_PD_Data_limit_c = input[idx + 7];
++              activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++      }
++      idx = 1 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Socclk */
++              activity_monitor->Fclk_FPS = input[idx + 1];
++              activity_monitor->Fclk_MinFreqStep = input[idx + 2];
++              activity_monitor->Fclk_MinActiveFreqType = input[idx + 3];
++              activity_monitor->Fclk_MinActiveFreq = input[idx + 4];
++              activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
++              activity_monitor->Fclk_BoosterFreq = input[idx + 6];
++              activity_monitor->Fclk_PD_Data_limit_c = input[idx + 7];
++              activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 9];
++      }
++      idx = 2 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Memclk */
++              activity_monitor->Mem_FPS = input[idx + 1];
++              activity_monitor->Mem_MinFreqStep = input[idx + 2];
++              activity_monitor->Mem_MinActiveFreqType = input[idx + 3];
++              activity_monitor->Mem_MinActiveFreq = input[idx + 4];
++              activity_monitor->Mem_BoosterFreqType = input[idx + 5];
++              activity_monitor->Mem_BoosterFreq = input[idx + 6];
++              activity_monitor->Mem_PD_Data_limit_c = input[idx + 7];
++              activity_monitor->Mem_PD_Data_error_coeff = input[idx + 8];
++              activity_monitor->Mem_PD_Data_error_rate_coeff = input[idx + 9];
+       }
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+-              if (size != 10)
+-                      return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external), true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor_external), false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
++      return ret;
++}
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor->Gfx_FPS = input[1];
+-                      activity_monitor->Gfx_MinFreqStep = input[2];
+-                      activity_monitor->Gfx_MinActiveFreqType = input[3];
+-                      activity_monitor->Gfx_MinActiveFreq = input[4];
+-                      activity_monitor->Gfx_BoosterFreqType = input[5];
+-                      activity_monitor->Gfx_BoosterFreq = input[6];
+-                      activity_monitor->Gfx_PD_Data_limit_c = input[7];
+-                      activity_monitor->Gfx_PD_Data_error_coeff = input[8];
+-                      activity_monitor->Gfx_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              case 1: /* Socclk */
+-                      activity_monitor->Fclk_FPS = input[1];
+-                      activity_monitor->Fclk_MinFreqStep = input[2];
+-                      activity_monitor->Fclk_MinActiveFreqType = input[3];
+-                      activity_monitor->Fclk_MinActiveFreq = input[4];
+-                      activity_monitor->Fclk_BoosterFreqType = input[5];
+-                      activity_monitor->Fclk_BoosterFreq = input[6];
+-                      activity_monitor->Fclk_PD_Data_limit_c = input[7];
+-                      activity_monitor->Fclk_PD_Data_error_coeff = input[8];
+-                      activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              case 2: /* Memclk */
+-                      activity_monitor->Mem_FPS = input[1];
+-                      activity_monitor->Mem_MinFreqStep = input[2];
+-                      activity_monitor->Mem_MinActiveFreqType = input[3];
+-                      activity_monitor->Mem_MinActiveFreq = input[4];
+-                      activity_monitor->Mem_BoosterFreqType = input[5];
+-                      activity_monitor->Mem_BoosterFreq = input[6];
+-                      activity_monitor->Mem_PD_Data_limit_c = input[7];
+-                      activity_monitor->Mem_PD_Data_error_coeff = input[8];
+-                      activity_monitor->Mem_PD_Data_error_rate_coeff = input[9];
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
++static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu,
++                                               u32 workload_mask,
++                                               long *custom_params,
++                                               u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int ret, idx = -1, i;
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor_external), true);
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
++
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params =
++                              kzalloc(SIENNA_CICHLID_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
++              }
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != SIENNA_CICHLID_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = sienna_cichlid_set_power_profile_mode_coeff(smu,
++                                                                smu->custom_profile_params);
+               if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
+                       return ret;
+               }
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, SIENNA_CICHLID_CUSTOM_PARAMS_SIZE);
+       }
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     smu->power_profile_mode);
+-      if (workload_type < 0)
+-              return -EINVAL;
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+-                                  1 << workload_type, NULL);
+-      if (ret)
+-              dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
++                                            backend_workload_mask, NULL);
++      if (ret) {
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
++              return ret;
++      }
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+@@ -1054,42 +1054,27 @@ static int vangogh_get_power_profile_mod
+       return size;
+ }
+-static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++static int vangogh_set_power_profile_mode(struct smu_context *smu,
++                                        u32 workload_mask,
++                                        long *custom_params,
++                                        u32 custom_params_max_idx)
+ {
+-      int workload_type, ret;
+-      uint32_t profile_mode = input[size];
++      u32 backend_workload_mask = 0;
++      int ret;
+-      if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+-              return -EINVAL;
+-      }
+-
+-      if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
+-                      profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
+-              return 0;
+-
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     profile_mode);
+-      if (workload_type < 0) {
+-              dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
+-                                      profile_mode);
+-              return -EINVAL;
+-      }
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
+-                                  1 << workload_type,
+-                                  NULL);
++                                            backend_workload_mask,
++                                            NULL);
+       if (ret) {
+-              dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
+-                                      workload_type);
++              dev_err_once(smu->adev->dev, "Fail to set workload mask 0x%08x\n",
++                           workload_mask);
+               return ret;
+       }
+-      smu->power_profile_mode = profile_mode;
+-
+-      return 0;
++      return ret;
+ }
+ static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+@@ -862,44 +862,27 @@ static int renoir_force_clk_levels(struc
+       return ret;
+ }
+-static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++static int renoir_set_power_profile_mode(struct smu_context *smu,
++                                       u32 workload_mask,
++                                       long *custom_params,
++                                       u32 custom_params_max_idx)
+ {
+-      int workload_type, ret;
+-      uint32_t profile_mode = input[size];
++      int ret;
++      u32 backend_workload_mask = 0;
+-      if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+-              return -EINVAL;
+-      }
+-
+-      if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
+-                      profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
+-              return 0;
+-
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     profile_mode);
+-      if (workload_type < 0) {
+-              /*
+-               * TODO: If some case need switch to powersave/default power mode
+-               * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
+-               */
+-              dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
+-              return -EINVAL;
+-      }
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
+-                                  1 << workload_type,
+-                                  NULL);
++                                            backend_workload_mask,
++                                            NULL);
+       if (ret) {
+-              dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
++              dev_err_once(smu->adev->dev, "Failed to set workload mask 0x08%x\n",
++                           workload_mask);
+               return ret;
+       }
+-      smu->power_profile_mode = profile_mode;
+-
+-      return 0;
++      return ret;
+ }
+ static int renoir_set_peak_clock_by_device(struct smu_context *smu)
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -2477,82 +2477,76 @@ static int smu_v13_0_0_get_power_profile
+       return size;
+ }
+-static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+-                                            long *input,
+-                                            uint32_t size)
++#define SMU_13_0_0_CUSTOM_PARAMS_COUNT 9
++#define SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_13_0_0_CUSTOM_PARAMS_SIZE (SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_0_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v13_0_0_set_power_profile_mode_coeff(struct smu_context *smu,
++                                                  long *input)
+ {
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+-      int workload_type, ret = 0;
+-      u32 workload_mask, selected_workload_mask;
++      int ret, idx;
+-      smu->power_profile_mode = input[size];
+-
+-      if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+-              return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external),
++                                 false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
+       }
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+-              if (size != 9)
+-                      return -EINVAL;
+-
+-              ret = smu_cmn_update_table(smu,
+-                                         SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                         WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                         (void *)(&activity_monitor_external),
+-                                         false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
+-
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor->Gfx_FPS = input[1];
+-                      activity_monitor->Gfx_MinActiveFreqType = input[2];
+-                      activity_monitor->Gfx_MinActiveFreq = input[3];
+-                      activity_monitor->Gfx_BoosterFreqType = input[4];
+-                      activity_monitor->Gfx_BoosterFreq = input[5];
+-                      activity_monitor->Gfx_PD_Data_limit_c = input[6];
+-                      activity_monitor->Gfx_PD_Data_error_coeff = input[7];
+-                      activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
+-                      break;
+-              case 1: /* Fclk */
+-                      activity_monitor->Fclk_FPS = input[1];
+-                      activity_monitor->Fclk_MinActiveFreqType = input[2];
+-                      activity_monitor->Fclk_MinActiveFreq = input[3];
+-                      activity_monitor->Fclk_BoosterFreqType = input[4];
+-                      activity_monitor->Fclk_BoosterFreq = input[5];
+-                      activity_monitor->Fclk_PD_Data_limit_c = input[6];
+-                      activity_monitor->Fclk_PD_Data_error_coeff = input[7];
+-                      activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
++      idx = 0 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor->Gfx_FPS = input[idx + 1];
++              activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
++              activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
++              activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
++              activity_monitor->Gfx_BoosterFreq = input[idx + 5];
++              activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
++              activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
++              activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
++      }
++      idx = 1 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Fclk */
++              activity_monitor->Fclk_FPS = input[idx + 1];
++              activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
++              activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
++              activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
++              activity_monitor->Fclk_BoosterFreq = input[idx + 5];
++              activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
++              activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
++              activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                         SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                         WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                         (void *)(&activity_monitor_external),
+-                                         true);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+-                      return ret;
+-              }
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external),
++                                 true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
+       }
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     smu->power_profile_mode);
++      return ret;
++}
+-      if (workload_type < 0)
+-              return -EINVAL;
++static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
++                                            u32 workload_mask,
++                                            long *custom_params,
++                                            u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int workload_type, ret, idx = -1, i;
+-      selected_workload_mask = workload_mask = 1 << workload_type;
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
+       /* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
+       if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+@@ -2564,15 +2558,48 @@ static int smu_v13_0_0_set_power_profile
+                                                              CMN2ASIC_MAPPING_WORKLOAD,
+                                                              PP_SMC_POWER_PROFILE_POWERSAVING);
+               if (workload_type >= 0)
+-                      workload_mask |= 1 << workload_type;
++                      backend_workload_mask |= 1 << workload_type;
++      }
++
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params =
++                              kzalloc(SMU_13_0_0_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
++              }
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != SMU_13_0_0_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = smu_v13_0_0_set_power_profile_mode_coeff(smu,
++                                                             smu->custom_profile_params);
++              if (ret) {
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
++                      return ret;
++              }
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, SMU_13_0_0_CUSTOM_PARAMS_SIZE);
+       }
+       ret = smu_cmn_send_smc_msg_with_param(smu,
+-                                             SMU_MSG_SetWorkloadMask,
+-                                             workload_mask,
+-                                             NULL);
+-      if (!ret)
+-              smu->workload_mask = selected_workload_mask;
++                                            SMU_MSG_SetWorkloadMask,
++                                            backend_workload_mask,
++                                            NULL);
++      if (ret) {
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
++              return ret;
++      }
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -2436,78 +2436,110 @@ out:
+       return result;
+ }
+-static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define SMU_13_0_7_CUSTOM_PARAMS_COUNT 8
++#define SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_13_0_7_CUSTOM_PARAMS_SIZE (SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_7_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v13_0_7_set_power_profile_mode_coeff(struct smu_context *smu,
++                                                  long *input)
+ {
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+-      int workload_type, ret = 0;
++      int ret, idx;
+-      smu->power_profile_mode = input[size];
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external), false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
++      }
+-      if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+-              return -EINVAL;
++      idx = 0 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor->Gfx_ActiveHystLimit = input[idx + 1];
++              activity_monitor->Gfx_IdleHystLimit = input[idx + 2];
++              activity_monitor->Gfx_FPS = input[idx + 3];
++              activity_monitor->Gfx_MinActiveFreqType = input[idx + 4];
++              activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
++              activity_monitor->Gfx_MinActiveFreq = input[idx + 6];
++              activity_monitor->Gfx_BoosterFreq = input[idx + 7];
++      }
++      idx = 1 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Fclk */
++              activity_monitor->Fclk_ActiveHystLimit = input[idx + 1];
++              activity_monitor->Fclk_IdleHystLimit = input[idx + 2];
++              activity_monitor->Fclk_FPS = input[idx + 3];
++              activity_monitor->Fclk_MinActiveFreqType = input[idx + 4];
++              activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
++              activity_monitor->Fclk_MinActiveFreq = input[idx + 6];
++              activity_monitor->Fclk_BoosterFreq = input[idx + 7];
+       }
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+-              if (size != 8)
+-                      return -EINVAL;
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external), true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor_external), false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
++      return ret;
++}
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor->Gfx_ActiveHystLimit = input[1];
+-                      activity_monitor->Gfx_IdleHystLimit = input[2];
+-                      activity_monitor->Gfx_FPS = input[3];
+-                      activity_monitor->Gfx_MinActiveFreqType = input[4];
+-                      activity_monitor->Gfx_BoosterFreqType = input[5];
+-                      activity_monitor->Gfx_MinActiveFreq = input[6];
+-                      activity_monitor->Gfx_BoosterFreq = input[7];
+-                      break;
+-              case 1: /* Fclk */
+-                      activity_monitor->Fclk_ActiveHystLimit = input[1];
+-                      activity_monitor->Fclk_IdleHystLimit = input[2];
+-                      activity_monitor->Fclk_FPS = input[3];
+-                      activity_monitor->Fclk_MinActiveFreqType = input[4];
+-                      activity_monitor->Fclk_BoosterFreqType = input[5];
+-                      activity_monitor->Fclk_MinActiveFreq = input[6];
+-                      activity_monitor->Fclk_BoosterFreq = input[7];
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
++static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu,
++                                            u32 workload_mask,
++                                            long *custom_params,
++                                            u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int ret, idx = -1, i;
++
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
+-              ret = smu_cmn_update_table(smu,
+-                                     SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                     (void *)(&activity_monitor_external), true);
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params =
++                              kzalloc(SMU_13_0_7_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
++              }
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != SMU_13_0_7_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = smu_v13_0_7_set_power_profile_mode_coeff(smu,
++                                                             smu->custom_profile_params);
+               if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
+                       return ret;
+               }
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, SMU_13_0_7_CUSTOM_PARAMS_SIZE);
+       }
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     smu->power_profile_mode);
+-      if (workload_type < 0)
+-              return -EINVAL;
+       ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+-                                  1 << workload_type, NULL);
++                                            backend_workload_mask, NULL);
+-      if (ret)
+-              dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
+-      else
+-              smu->workload_mask = (1 << workload_type);
++      if (ret) {
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
++              return ret;
++      }
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+@@ -1751,90 +1751,120 @@ static int smu_v14_0_2_get_power_profile
+       return size;
+ }
+-static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
+-                                            long *input,
+-                                            uint32_t size)
++#define SMU_14_0_2_CUSTOM_PARAMS_COUNT 9
++#define SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_14_0_2_CUSTOM_PARAMS_SIZE (SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT * SMU_14_0_2_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v14_0_2_set_power_profile_mode_coeff(struct smu_context *smu,
++                                                  long *input)
+ {
+       DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+-      int workload_type, ret = 0;
+-      uint32_t current_profile_mode = smu->power_profile_mode;
+-      smu->power_profile_mode = input[size];
+-
+-      if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+-              dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+-              return -EINVAL;
+-      }
+-
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+-              if (size != 9)
+-                      return -EINVAL;
+-
+-              ret = smu_cmn_update_table(smu,
+-                                         SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                         WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                         (void *)(&activity_monitor_external),
+-                                         false);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+-                      return ret;
+-              }
++      int ret, idx;
+-              switch (input[0]) {
+-              case 0: /* Gfxclk */
+-                      activity_monitor->Gfx_FPS = input[1];
+-                      activity_monitor->Gfx_MinActiveFreqType = input[2];
+-                      activity_monitor->Gfx_MinActiveFreq = input[3];
+-                      activity_monitor->Gfx_BoosterFreqType = input[4];
+-                      activity_monitor->Gfx_BoosterFreq = input[5];
+-                      activity_monitor->Gfx_PD_Data_limit_c = input[6];
+-                      activity_monitor->Gfx_PD_Data_error_coeff = input[7];
+-                      activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
+-                      break;
+-              case 1: /* Fclk */
+-                      activity_monitor->Fclk_FPS = input[1];
+-                      activity_monitor->Fclk_MinActiveFreqType = input[2];
+-                      activity_monitor->Fclk_MinActiveFreq = input[3];
+-                      activity_monitor->Fclk_BoosterFreqType = input[4];
+-                      activity_monitor->Fclk_BoosterFreq = input[5];
+-                      activity_monitor->Fclk_PD_Data_limit_c = input[6];
+-                      activity_monitor->Fclk_PD_Data_error_coeff = input[7];
+-                      activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
+-                      break;
+-              default:
+-                      return -EINVAL;
+-              }
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external),
++                                 false);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++              return ret;
++      }
+-              ret = smu_cmn_update_table(smu,
+-                                         SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+-                                         WORKLOAD_PPLIB_CUSTOM_BIT,
+-                                         (void *)(&activity_monitor_external),
+-                                         true);
+-              if (ret) {
+-                      dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+-                      return ret;
+-              }
++      idx = 0 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Gfxclk */
++              activity_monitor->Gfx_FPS = input[idx + 1];
++              activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
++              activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
++              activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
++              activity_monitor->Gfx_BoosterFreq = input[idx + 5];
++              activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
++              activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
++              activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
++      }
++      idx = 1 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++      if (input[idx]) {
++              /* Fclk */
++              activity_monitor->Fclk_FPS = input[idx + 1];
++              activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
++              activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
++              activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
++              activity_monitor->Fclk_BoosterFreq = input[idx + 5];
++              activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
++              activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
++              activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
+       }
+-      if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
++      ret = smu_cmn_update_table(smu,
++                                 SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++                                 WORKLOAD_PPLIB_CUSTOM_BIT,
++                                 (void *)(&activity_monitor_external),
++                                 true);
++      if (ret) {
++              dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++              return ret;
++      }
++
++      return ret;
++}
++
++static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
++                                            u32 workload_mask,
++                                            long *custom_params,
++                                            u32 custom_params_max_idx)
++{
++      u32 backend_workload_mask = 0;
++      int ret, idx = -1, i;
++
++      smu_cmn_get_backend_workload_mask(smu, workload_mask,
++                                        &backend_workload_mask);
++
++      /* disable deep sleep if compute is enabled */
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE))
+               smu_v14_0_deep_sleep_control(smu, false);
+-      else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
++      else
+               smu_v14_0_deep_sleep_control(smu, true);
+-      /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+-      workload_type = smu_cmn_to_asic_specific_index(smu,
+-                                                     CMN2ASIC_MAPPING_WORKLOAD,
+-                                                     smu->power_profile_mode);
+-      if (workload_type < 0)
+-              return -EINVAL;
+-
+-      ret = smu_cmn_send_smc_msg_with_param(smu,
+-                                             SMU_MSG_SetWorkloadMask,
+-                                             1 << workload_type,
+-                                             NULL);
+-      if (!ret)
+-              smu->workload_mask = 1 << workload_type;
++      if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++              if (!smu->custom_profile_params) {
++                      smu->custom_profile_params =
++                              kzalloc(SMU_14_0_2_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++                      if (!smu->custom_profile_params)
++                              return -ENOMEM;
++              }
++              if (custom_params && custom_params_max_idx) {
++                      if (custom_params_max_idx != SMU_14_0_2_CUSTOM_PARAMS_COUNT)
++                              return -EINVAL;
++                      if (custom_params[0] >= SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT)
++                              return -EINVAL;
++                      idx = custom_params[0] * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++                      smu->custom_profile_params[idx] = 1;
++                      for (i = 1; i < custom_params_max_idx; i++)
++                              smu->custom_profile_params[idx + i] = custom_params[i];
++              }
++              ret = smu_v14_0_2_set_power_profile_mode_coeff(smu,
++                                                             smu->custom_profile_params);
++              if (ret) {
++                      if (idx != -1)
++                              smu->custom_profile_params[idx] = 0;
++                      return ret;
++              }
++      } else if (smu->custom_profile_params) {
++              memset(smu->custom_profile_params, 0, SMU_14_0_2_CUSTOM_PARAMS_SIZE);
++      }
++
++      ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
++                                            backend_workload_mask, NULL);
++      if (ret) {
++              dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++                      workload_mask);
++              if (idx != -1)
++                      smu->custom_profile_params[idx] = 0;
++              return ret;
++      }
+       return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+@@ -1215,3 +1215,28 @@ void smu_cmn_generic_plpd_policy_desc(st
+ {
+       policy->desc = &xgmi_plpd_policy_desc;
+ }
++
++void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
++                                     u32 workload_mask,
++                                     u32 *backend_workload_mask)
++{
++      int workload_type;
++      u32 profile_mode;
++
++      *backend_workload_mask = 0;
++
++      for (profile_mode = 0; profile_mode < PP_SMC_POWER_PROFILE_COUNT; profile_mode++) {
++              if (!(workload_mask & (1 << profile_mode)))
++                      continue;
++
++              /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
++              workload_type = smu_cmn_to_asic_specific_index(smu,
++                                                             CMN2ASIC_MAPPING_WORKLOAD,
++                                                             profile_mode);
++
++              if (workload_type < 0)
++                      continue;
++
++              *backend_workload_mask |= 1 << workload_type;
++      }
++}
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+@@ -147,5 +147,9 @@ bool smu_cmn_is_audio_func_enabled(struc
+ void smu_cmn_generic_soc_policy_desc(struct smu_dpm_policy *policy);
+ void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy);
++void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
++                                     u32 workload_mask,
++                                     u32 *backend_workload_mask);
++
+ #endif
+ #endif
diff --git a/queue-6.12/drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch b/queue-6.12/drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch
new file mode 100644 (file)
index 0000000..f1d53ba
--- /dev/null
@@ -0,0 +1,53 @@
+From c9b8dcabb52afe88413ff135a0953e3cc4128483 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:22:51 -0500
+Subject: drm/amdgpu/hdp4.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit c9b8dcabb52afe88413ff135a0953e3cc4128483 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c |   12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+@@ -40,10 +40,12 @@
+ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring)
+ {
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+-      else
++              RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++      } else {
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++      }
+ }
+ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -54,11 +56,13 @@ static void hdp_v4_0_invalidate_hdp(stru
+           amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
+               return;
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+-      else
++              RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
++      } else {
+               amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+                       HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
++      }
+ }
+ static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
diff --git a/queue-6.12/drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch b/queue-6.12/drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch
new file mode 100644 (file)
index 0000000..fa56499
--- /dev/null
@@ -0,0 +1,45 @@
+From cf424020e040be35df05b682b546b255e74a420f Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:23:56 -0500
+Subject: drm/amdgpu/hdp5.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit cf424020e040be35df05b682b546b255e74a420f upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c |    7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring)
+ {
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+-      else
++              RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++      } else {
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++      }
+ }
+ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -42,6 +44,7 @@ static void hdp_v5_0_invalidate_hdp(stru
+ {
+       if (!ring || !ring->funcs->emit_wreg) {
+               WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
++              RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+       } else {
+               amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+                                       HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
diff --git a/queue-6.12/drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch b/queue-6.12/drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch
new file mode 100644 (file)
index 0000000..16cd341
--- /dev/null
@@ -0,0 +1,40 @@
+From f756dbac1ce1d5f9a2b35e3b55fa429cf6336437 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:24:13 -0500
+Subject: drm/amdgpu/hdp5.2: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit f756dbac1ce1d5f9a2b35e3b55fa429cf6336437 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+@@ -31,13 +31,15 @@
+ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring)
+ {
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+                       0);
+-      else
++              RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++      } else {
+               amdgpu_ring_emit_wreg(ring,
+                       (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+                       0);
++      }
+ }
+ static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
diff --git a/queue-6.12/drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch b/queue-6.12/drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch
new file mode 100644 (file)
index 0000000..5efed87
--- /dev/null
@@ -0,0 +1,37 @@
+From abe1cbaec6cfe9fde609a15cd6a12c812282ce77 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:24:38 -0500
+Subject: drm/amdgpu/hdp6.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit abe1cbaec6cfe9fde609a15cd6a12c812282ce77 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+@@ -34,10 +34,12 @@
+ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring)
+ {
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+-      else
++              RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++      } else {
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++      }
+ }
+ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
diff --git a/queue-6.12/drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch b/queue-6.12/drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch
new file mode 100644 (file)
index 0000000..9ee6355
--- /dev/null
@@ -0,0 +1,37 @@
+From 689275140cb8e9f8ae59e545086fce51fb0b994a Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Thu, 28 Nov 2024 16:05:24 +0800
+Subject: drm/amdgpu/hdp7.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 689275140cb8e9f8ae59e545086fce51fb0b994a upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c |    6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
+                               struct amdgpu_ring *ring)
+ {
+-      if (!ring || !ring->funcs->emit_wreg)
++      if (!ring || !ring->funcs->emit_wreg) {
+               WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+-      else
++              RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++      } else {
+               amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++      }
+ }
+ static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
diff --git a/queue-6.12/drm-amdgpu-rework-resume-handling-for-display-v2.patch b/queue-6.12/drm-amdgpu-rework-resume-handling-for-display-v2.patch
new file mode 100644 (file)
index 0000000..e19b49d
--- /dev/null
@@ -0,0 +1,116 @@
+From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Mon, 25 Nov 2024 13:59:09 -0500
+Subject: drm/amdgpu: rework resume handling for display (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 73dae652dcac776296890da215ee7dec357a1032 upstream.
+
+Split resume into a 3rd step to handle displays when DCC is
+enabled on DCN 4.0.1.  Move display after the buffer funcs
+have been re-enabled so that the GPU will do the move and
+properly set the DCC metadata for DCN.
+
+v2: fix fence irq resume ordering
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |   45 +++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3666,7 +3666,7 @@ static int amdgpu_device_ip_resume_phase
+  *
+  * @adev: amdgpu_device pointer
+  *
+- * First resume function for hardware IPs.  The list of all the hardware
++ * Second resume function for hardware IPs.  The list of all the hardware
+  * IPs that make up the asic is walked and the resume callbacks are run for
+  * all blocks except COMMON, GMC, and IH.  resume puts the hardware into a
+  * functional state after a suspend and updates the software state as
+@@ -3684,6 +3684,7 @@ static int amdgpu_device_ip_resume_phase
+               if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
++                  adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+                       continue;
+               r = adev->ip_blocks[i].version->funcs->resume(adev);
+@@ -3699,6 +3700,36 @@ static int amdgpu_device_ip_resume_phase
+ }
+ /**
++ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
++ *
++ * @adev: amdgpu_device pointer
++ *
++ * Third resume function for hardware IPs.  The list of all the hardware
++ * IPs that make up the asic is walked and the resume callbacks are run for
++ * all DCE.  resume puts the hardware into a functional state after a suspend
++ * and updates the software state as necessary.  This function is also used
++ * for restoring the GPU after a GPU reset.
++ *
++ * Returns 0 on success, negative error code on failure.
++ */
++static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
++{
++      int i, r;
++
++      for (i = 0; i < adev->num_ip_blocks; i++) {
++              if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
++                      continue;
++              if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
++                      r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
++                      if (r)
++                              return r;
++              }
++      }
++
++      return 0;
++}
++
++/**
+  * amdgpu_device_ip_resume - run resume for hardware IPs
+  *
+  * @adev: amdgpu_device pointer
+@@ -3727,6 +3758,13 @@ static int amdgpu_device_ip_resume(struc
+       if (adev->mman.buffer_funcs_ring->sched.ready)
+               amdgpu_ttm_set_buffer_funcs_status(adev, true);
++      if (r)
++              return r;
++
++      amdgpu_fence_driver_hw_init(adev);
++
++      r = amdgpu_device_ip_resume_phase3(adev);
++
+       return r;
+ }
+@@ -4809,7 +4847,6 @@ int amdgpu_device_resume(struct drm_devi
+               dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+               goto exit;
+       }
+-      amdgpu_fence_driver_hw_init(adev);
+       if (!adev->in_s0ix) {
+               r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+@@ -5431,6 +5468,10 @@ int amdgpu_do_asic_reset(struct list_hea
+                               if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+                                       amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
++                              r = amdgpu_device_ip_resume_phase3(tmp_adev);
++                              if (r)
++                                      goto out;
++
+                               if (vram_lost)
+                                       amdgpu_device_fill_reset_magic(tmp_adev);
diff --git a/queue-6.12/drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch b/queue-6.12/drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch
new file mode 100644 (file)
index 0000000..2eaab51
--- /dev/null
@@ -0,0 +1,39 @@
+From 33114f1057ea5cf40e604021711a9711a060fcb6 Mon Sep 17 00:00:00 2001
+From: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+Date: Thu, 28 Nov 2024 12:05:56 -0500
+Subject: drm/amdkfd: add MEC version that supports no PCIe atomics for GFX12
+
+From: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+
+commit 33114f1057ea5cf40e604021711a9711a060fcb6 upstream.
+
+Add MEC version from which alternate support for no PCIe atomics
+is provided so that device is not skipped during KFD device init in
+GFX1200/GFX1201.
+
+Signed-off-by: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 956198da7859..9b51dd75fefc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -235,6 +235,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
+                        */
+                       kfd->device_info.needs_pci_atomics = true;
+                       kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
++              } else if (gc_version < IP_VERSION(13, 0, 0)) {
++                      kfd->device_info.needs_pci_atomics = true;
++                      kfd->device_info.no_atomic_fw_version = 2090;
+               } else {
+                       kfd->device_info.needs_pci_atomics = true;
+               }
+-- 
+2.47.1
+
diff --git a/queue-6.12/drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch b/queue-6.12/drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch
new file mode 100644 (file)
index 0000000..41480df
--- /dev/null
@@ -0,0 +1,63 @@
+From 55ed120dcfdde2478c3ebfa1c0ac4ed1e430053b Mon Sep 17 00:00:00 2001
+From: David Yat Sin <David.YatSin@amd.com>
+Date: Tue, 26 Nov 2024 15:18:47 -0500
+Subject: drm/amdkfd: hard-code cacheline for gc943,gc944
+
+From: David Yat Sin <David.YatSin@amd.com>
+
+commit 55ed120dcfdde2478c3ebfa1c0ac4ed1e430053b upstream.
+
+Cacheline size is not available in IP discovery for gc943,gc944.
+
+Signed-off-by: David Yat Sin <David.YatSin@amd.com>
+Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c |    6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -1509,6 +1509,8 @@ static int kfd_fill_gpu_cache_info_from_
+       if (adev->gfx.config.gc_tcp_size_per_cu) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+               pcache_info[i].cache_level = 1;
++              /* Cacheline size not available in IP discovery for gc943,gc944 */
++              pcache_info[i].cache_line_size = 128;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1520,6 +1522,7 @@ static int kfd_fill_gpu_cache_info_from_
+               pcache_info[i].cache_size =
+                       adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+               pcache_info[i].cache_level = 1;
++              pcache_info[i].cache_line_size = 64;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_INST_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1530,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_
+       if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+               pcache_info[i].cache_level = 1;
++              pcache_info[i].cache_line_size = 64;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1540,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_
+       if (adev->gfx.config.gc_tcc_size) {
+               pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+               pcache_info[i].cache_level = 2;
++              pcache_info[i].cache_line_size = 128;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1550,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_
+       if (adev->gmc.mall_size) {
+               pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+               pcache_info[i].cache_level = 3;
++              pcache_info[i].cache_line_size = 64;
+               pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+                                       CRAT_CACHE_FLAGS_DATA_CACHE |
+                                       CRAT_CACHE_FLAGS_SIMD_CACHE);
diff --git a/queue-6.12/drm-dp_mst-fix-mst-sideband-message-body-length-check.patch b/queue-6.12/drm-dp_mst-fix-mst-sideband-message-body-length-check.patch
new file mode 100644 (file)
index 0000000..c75f45f
--- /dev/null
@@ -0,0 +1,54 @@
+From bd2fccac61b40eaf08d9546acc9fef958bfe4763 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Mon, 25 Nov 2024 22:53:14 +0200
+Subject: drm/dp_mst: Fix MST sideband message body length check
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit bd2fccac61b40eaf08d9546acc9fef958bfe4763 upstream.
+
+Fix the MST sideband message body length check, which must be at least 1
+byte accounting for the message body CRC (aka message data CRC) at the
+end of the message.
+
+This fixes a case where an MST branch device returns a header with a
+correct header CRC (indicating a correctly received body length), with
+the body length being incorrectly set to 0. This will later lead to a
+memory corruption in drm_dp_sideband_append_payload() and the following
+errors in dmesg:
+
+   UBSAN: array-index-out-of-bounds in drivers/gpu/drm/display/drm_dp_mst_topology.c:786:25
+   index -1 is out of range for type 'u8 [48]'
+   Call Trace:
+    drm_dp_sideband_append_payload+0x33d/0x350 [drm_display_helper]
+    drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper]
+    drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper]
+
+   memcpy: detected field-spanning write (size 18446744073709551615) of single field "&msg->msg[msg->curlen]" at drivers/gpu/drm/display/drm_dp_mst_topology.c:791 (size 256)
+   Call Trace:
+    drm_dp_sideband_append_payload+0x324/0x350 [drm_display_helper]
+    drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper]
+    drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper]
+
+Cc: <stable@vger.kernel.org>
+Cc: Lyude Paul <lyude@redhat.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241125205314.1725887-1-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c |    3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -320,6 +320,9 @@ static bool drm_dp_decode_sideband_msg_h
+       hdr->broadcast = (buf[idx] >> 7) & 0x1;
+       hdr->path_msg = (buf[idx] >> 6) & 0x1;
+       hdr->msg_len = buf[idx] & 0x3f;
++      if (hdr->msg_len < 1)           /* min space for body CRC */
++              return false;
++
+       idx++;
+       hdr->somt = (buf[idx] >> 7) & 0x1;
+       hdr->eomt = (buf[idx] >> 6) & 0x1;
diff --git a/queue-6.12/drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch b/queue-6.12/drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch
new file mode 100644 (file)
index 0000000..f910319
--- /dev/null
@@ -0,0 +1,108 @@
+From a6fa67d26de385c3c7a23c1e109a0e23bfda4ec7 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 3 Dec 2024 18:02:17 +0200
+Subject: drm/dp_mst: Fix resetting msg rx state after topology removal
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit a6fa67d26de385c3c7a23c1e109a0e23bfda4ec7 upstream.
+
+If the MST topology is removed during the reception of an MST down reply
+or MST up request sideband message, the
+drm_dp_mst_topology_mgr::up_req_recv/down_rep_recv states could be reset
+from one thread via drm_dp_mst_topology_mgr_set_mst(false), racing with
+the reading/parsing of the message from another thread via
+drm_dp_mst_handle_down_rep() or drm_dp_mst_handle_up_req(). The race is
+possible since the reader/parser doesn't hold any lock while accessing
+the reception state. This in turn can lead to a memory corruption in the
+reader/parser as described by commit bd2fccac61b4 ("drm/dp_mst: Fix MST
+sideband message body length check").
+
+Fix the above by resetting the message reception state if needed before
+reading/parsing a message. Another solution would be to hold the
+drm_dp_mst_topology_mgr::lock for the whole duration of the message
+reception/parsing in drm_dp_mst_handle_down_rep() and
+drm_dp_mst_handle_up_req(), however this would require a bigger change.
+Since the fix is also needed for stable, opting for the simpler solution
+in this patch.
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: <stable@vger.kernel.org>
+Fixes: 1d082618bbf3 ("drm/display/dp_mst: Fix down/up message handling after sink disconnect")
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13056
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-2-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c |   21 +++++++++++++++++++--
+ include/drm/display/drm_dp_mst_helper.h       |    7 +++++++
+ 2 files changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -3700,8 +3700,7 @@ int drm_dp_mst_topology_mgr_set_mst(stru
+               ret = 0;
+               mgr->payload_id_table_cleared = false;
+-              memset(&mgr->down_rep_recv, 0, sizeof(mgr->down_rep_recv));
+-              memset(&mgr->up_req_recv, 0, sizeof(mgr->up_req_recv));
++              mgr->reset_rx_state = true;
+       }
+ out_unlock:
+@@ -3859,6 +3858,11 @@ out_fail:
+ }
+ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
++static void reset_msg_rx_state(struct drm_dp_sideband_msg_rx *msg)
++{
++      memset(msg, 0, sizeof(*msg));
++}
++
+ static bool
+ drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up,
+                     struct drm_dp_mst_branch **mstb)
+@@ -4172,6 +4176,17 @@ out:
+       return 0;
+ }
++static void update_msg_rx_state(struct drm_dp_mst_topology_mgr *mgr)
++{
++      mutex_lock(&mgr->lock);
++      if (mgr->reset_rx_state) {
++              mgr->reset_rx_state = false;
++              reset_msg_rx_state(&mgr->down_rep_recv);
++              reset_msg_rx_state(&mgr->up_req_recv);
++      }
++      mutex_unlock(&mgr->lock);
++}
++
+ /**
+  * drm_dp_mst_hpd_irq_handle_event() - MST hotplug IRQ handle MST event
+  * @mgr: manager to notify irq for.
+@@ -4206,6 +4221,8 @@ int drm_dp_mst_hpd_irq_handle_event(stru
+               *handled = true;
+       }
++      update_msg_rx_state(mgr);
++
+       if (esi[1] & DP_DOWN_REP_MSG_RDY) {
+               ret = drm_dp_mst_handle_down_rep(mgr);
+               *handled = true;
+--- a/include/drm/display/drm_dp_mst_helper.h
++++ b/include/drm/display/drm_dp_mst_helper.h
+@@ -700,6 +700,13 @@ struct drm_dp_mst_topology_mgr {
+       bool payload_id_table_cleared : 1;
+       /**
++       * @reset_rx_state: The down request's reply and up request message
++       * receiver state must be reset, after the topology manager got
++       * removed. Protected by @lock.
++       */
++      bool reset_rx_state : 1;
++
++      /**
+        * @payload_count: The number of currently active payloads in hardware. This value is only
+        * intended to be used internally by MST helpers for payload tracking, and is only safe to
+        * read/write from the atomic commit (not check) context.
diff --git a/queue-6.12/drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch b/queue-6.12/drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch
new file mode 100644 (file)
index 0000000..7faf242
--- /dev/null
@@ -0,0 +1,77 @@
+From 4d49e77a973d3b5d1881663c3f122906a0702940 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 3 Dec 2024 18:02:18 +0200
+Subject: drm/dp_mst: Verify request type in the corresponding down message reply
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 4d49e77a973d3b5d1881663c3f122906a0702940 upstream.
+
+After receiving the response for an MST down request message, the
+response should be accepted/parsed only if the response type matches
+that of the request. Ensure this by checking if the request type code
+stored both in the request and the reply match, dropping the reply in
+case of a mismatch.
+
+This fixes the topology detection for an MST hub, as described in the
+Closes link below, where the hub sends an incorrect reply message after
+a CLEAR_PAYLOAD_TABLE -> LINK_ADDRESS down request message sequence.
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: <stable@vger.kernel.org>
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12804
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-3-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c |   31 ++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -3937,6 +3937,34 @@ drm_dp_get_one_sb_msg(struct drm_dp_mst_
+       return true;
+ }
++static int get_msg_request_type(u8 data)
++{
++      return data & 0x7f;
++}
++
++static bool verify_rx_request_type(struct drm_dp_mst_topology_mgr *mgr,
++                                 const struct drm_dp_sideband_msg_tx *txmsg,
++                                 const struct drm_dp_sideband_msg_rx *rxmsg)
++{
++      const struct drm_dp_sideband_msg_hdr *hdr = &rxmsg->initial_hdr;
++      const struct drm_dp_mst_branch *mstb = txmsg->dst;
++      int tx_req_type = get_msg_request_type(txmsg->msg[0]);
++      int rx_req_type = get_msg_request_type(rxmsg->msg[0]);
++      char rad_str[64];
++
++      if (tx_req_type == rx_req_type)
++              return true;
++
++      drm_dp_mst_rad_to_str(mstb->rad, mstb->lct, rad_str, sizeof(rad_str));
++      drm_dbg_kms(mgr->dev,
++                  "Got unexpected MST reply, mstb: %p seqno: %d lct: %d rad: %s rx_req_type: %s (%02x) != tx_req_type: %s (%02x)\n",
++                  mstb, hdr->seqno, mstb->lct, rad_str,
++                  drm_dp_mst_req_type_str(rx_req_type), rx_req_type,
++                  drm_dp_mst_req_type_str(tx_req_type), tx_req_type);
++
++      return false;
++}
++
+ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
+ {
+       struct drm_dp_sideband_msg_tx *txmsg;
+@@ -3966,6 +3994,9 @@ static int drm_dp_mst_handle_down_rep(st
+               goto out_clear_reply;
+       }
++      if (!verify_rx_request_type(mgr, txmsg, msg))
++              goto out_clear_reply;
++
+       drm_dp_sideband_parse_reply(mgr, msg, &txmsg->reply);
+       if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
diff --git a/queue-6.12/mmc-core-further-prevent-card-detect-during-shutdown.patch b/queue-6.12/mmc-core-further-prevent-card-detect-during-shutdown.patch
new file mode 100644 (file)
index 0000000..a094310
--- /dev/null
@@ -0,0 +1,56 @@
+From 87a0d90fcd31c0f36da0332428c9e1a1e0f97432 Mon Sep 17 00:00:00 2001
+From: Ulf Hansson <ulf.hansson@linaro.org>
+Date: Mon, 25 Nov 2024 13:24:46 +0100
+Subject: mmc: core: Further prevent card detect during shutdown
+
+From: Ulf Hansson <ulf.hansson@linaro.org>
+
+commit 87a0d90fcd31c0f36da0332428c9e1a1e0f97432 upstream.
+
+Disabling card detect from the host's ->shutdown_pre() callback turned out
+to not be the complete solution. More precisely, beyond the point when the
+mmc_bus->shutdown() has been called, to gracefully power off the card, we
+need to prevent card detect. Otherwise the mmc_rescan work may poll for the
+card with a CMD13, to see if it's still alive, which then will fail and
+hang as the card has already been powered off.
+
+To fix this problem, let's disable mmc_rescan prior to power off the card
+during shutdown.
+
+Reported-by: Anthony Pighin <anthony.pighin@nokia.com>
+Fixes: 66c915d09b94 ("mmc: core: Disable card detect during shutdown")
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Closes: https://lore.kernel.org/all/BN0PR08MB695133000AF116F04C3A9FFE83212@BN0PR08MB6951.namprd08.prod.outlook.com/
+Tested-by: Anthony Pighin <anthony.pighin@nokia.com>
+Message-ID: <20241125122446.18684-1-ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/bus.c  |    2 ++
+ drivers/mmc/core/core.c |    3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/drivers/mmc/core/bus.c
++++ b/drivers/mmc/core/bus.c
+@@ -149,6 +149,8 @@ static void mmc_bus_shutdown(struct devi
+       if (dev->driver && drv->shutdown)
+               drv->shutdown(card);
++      __mmc_stop_host(host);
++
+       if (host->bus_ops->shutdown) {
+               ret = host->bus_ops->shutdown(host);
+               if (ret)
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -2296,6 +2296,9 @@ void mmc_start_host(struct mmc_host *hos
+ void __mmc_stop_host(struct mmc_host *host)
+ {
++      if (host->rescan_disable)
++              return;
++
+       if (host->slot.cd_irq >= 0) {
+               mmc_gpio_set_cd_wake(host, false);
+               disable_irq(host->slot.cd_irq);
diff --git a/queue-6.12/mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch b/queue-6.12/mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch
new file mode 100644 (file)
index 0000000..b23835c
--- /dev/null
@@ -0,0 +1,155 @@
+From 7f0fa47ceebcff0e3591bb7e32a71a2cd7846149 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 18 Nov 2024 22:00:49 +0100
+Subject: mmc: sdhci-pci: Add DMI quirk for missing CD GPIO on Vexia Edu Atla 10 tablet
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 7f0fa47ceebcff0e3591bb7e32a71a2cd7846149 upstream.
+
+The Vexia Edu Atla 10 tablet distributed to schools in the Spanish
+Andalucía region has no ACPI fwnode associated with the SDHCI controller
+for its microsd-slot and thus has no ACPI GPIO resource info.
+
+This causes the following error to be logged and the slot to not work:
+[   10.572113] sdhci-pci 0000:00:12.0: failed to setup card detect gpio
+
+Add a DMI quirk table for providing gpiod_lookup_tables with manually
+provided CD GPIO info and use this DMI table to provide the CD GPIO info
+on this tablet. This fixes the microsd-slot not working.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Cc: stable@vger.kernel.org
+Message-ID: <20241118210049.311079-1-hdegoede@redhat.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-pci-core.c |   72 ++++++++++++++++++++++++++++++++++++++
+ drivers/mmc/host/sdhci-pci.h      |    1 
+ 2 files changed, 73 insertions(+)
+
+--- a/drivers/mmc/host/sdhci-pci-core.c
++++ b/drivers/mmc/host/sdhci-pci-core.c
+@@ -21,6 +21,7 @@
+ #include <linux/io.h>
+ #include <linux/iopoll.h>
+ #include <linux/gpio.h>
++#include <linux/gpio/machine.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/pm_qos.h>
+ #include <linux/debugfs.h>
+@@ -1235,6 +1236,29 @@ static const struct sdhci_pci_fixes sdhc
+       .priv_size      = sizeof(struct intel_host),
+ };
++/* DMI quirks for devices with missing or broken CD GPIO info */
++static const struct gpiod_lookup_table vexia_edu_atla10_cd_gpios = {
++      .dev_id = "0000:00:12.0",
++      .table = {
++              GPIO_LOOKUP("INT33FC:00", 38, "cd", GPIO_ACTIVE_HIGH),
++              { }
++      },
++};
++
++static const struct dmi_system_id sdhci_intel_byt_cd_gpio_override[] = {
++      {
++              /* Vexia Edu Atla 10 tablet 9V version */
++              .matches = {
++                      DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
++                      DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
++                      /* Above strings are too generic, also match on BIOS date */
++                      DMI_MATCH(DMI_BIOS_DATE, "08/25/2014"),
++              },
++              .driver_data = (void *)&vexia_edu_atla10_cd_gpios,
++      },
++      { }
++};
++
+ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
+ #ifdef CONFIG_PM_SLEEP
+       .resume         = byt_resume,
+@@ -1253,6 +1277,7 @@ static const struct sdhci_pci_fixes sdhc
+       .add_host       = byt_add_host,
+       .remove_slot    = byt_remove_slot,
+       .ops            = &sdhci_intel_byt_ops,
++      .cd_gpio_override = sdhci_intel_byt_cd_gpio_override,
+       .priv_size      = sizeof(struct intel_host),
+ };
+@@ -2054,6 +2079,42 @@ static const struct dev_pm_ops sdhci_pci
+  *                                                                           *
+ \*****************************************************************************/
++static struct gpiod_lookup_table *sdhci_pci_add_gpio_lookup_table(
++      struct sdhci_pci_chip *chip)
++{
++      struct gpiod_lookup_table *cd_gpio_lookup_table;
++      const struct dmi_system_id *dmi_id = NULL;
++      size_t count;
++
++      if (chip->fixes && chip->fixes->cd_gpio_override)
++              dmi_id = dmi_first_match(chip->fixes->cd_gpio_override);
++
++      if (!dmi_id)
++              return NULL;
++
++      cd_gpio_lookup_table = dmi_id->driver_data;
++      for (count = 0; cd_gpio_lookup_table->table[count].key; count++)
++              ;
++
++      cd_gpio_lookup_table = kmemdup(dmi_id->driver_data,
++                                     /* count + 1 terminating entry */
++                                     struct_size(cd_gpio_lookup_table, table, count + 1),
++                                     GFP_KERNEL);
++      if (!cd_gpio_lookup_table)
++              return ERR_PTR(-ENOMEM);
++
++      gpiod_add_lookup_table(cd_gpio_lookup_table);
++      return cd_gpio_lookup_table;
++}
++
++static void sdhci_pci_remove_gpio_lookup_table(struct gpiod_lookup_table *lookup_table)
++{
++      if (lookup_table) {
++              gpiod_remove_lookup_table(lookup_table);
++              kfree(lookup_table);
++      }
++}
++
+ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
+       struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar,
+       int slotno)
+@@ -2129,8 +2190,19 @@ static struct sdhci_pci_slot *sdhci_pci_
+               device_init_wakeup(&pdev->dev, true);
+       if (slot->cd_idx >= 0) {
++              struct gpiod_lookup_table *cd_gpio_lookup_table;
++
++              cd_gpio_lookup_table = sdhci_pci_add_gpio_lookup_table(chip);
++              if (IS_ERR(cd_gpio_lookup_table)) {
++                      ret = PTR_ERR(cd_gpio_lookup_table);
++                      goto remove;
++              }
++
+               ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx,
+                                          slot->cd_override_level, 0);
++
++              sdhci_pci_remove_gpio_lookup_table(cd_gpio_lookup_table);
++
+               if (ret && ret != -EPROBE_DEFER)
+                       ret = mmc_gpiod_request_cd(host->mmc, NULL,
+                                                  slot->cd_idx,
+--- a/drivers/mmc/host/sdhci-pci.h
++++ b/drivers/mmc/host/sdhci-pci.h
+@@ -156,6 +156,7 @@ struct sdhci_pci_fixes {
+ #endif
+       const struct sdhci_ops  *ops;
++      const struct dmi_system_id *cd_gpio_override;
+       size_t                  priv_size;
+ };
diff --git a/queue-6.12/modpost-add-.irqentry.text-to-other_sections.patch b/queue-6.12/modpost-add-.irqentry.text-to-other_sections.patch
new file mode 100644 (file)
index 0000000..a00e388
--- /dev/null
@@ -0,0 +1,42 @@
+From 7912405643a14b527cd4a4f33c1d4392da900888 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 1 Dec 2024 12:17:30 +0100
+Subject: modpost: Add .irqentry.text to OTHER_SECTIONS
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7912405643a14b527cd4a4f33c1d4392da900888 upstream.
+
+The compiler can fully inline the actual handler function of an interrupt
+entry into the .irqentry.text entry point. If such a function contains an
+access which has an exception table entry, modpost complains about a
+section mismatch:
+
+  WARNING: vmlinux.o(__ex_table+0x447c): Section mismatch in reference ...
+
+  The relocation at __ex_table+0x447c references section ".irqentry.text"
+  which is not in the list of authorized sections.
+
+Add .irqentry.text to OTHER_SECTIONS to cure the issue.
+
+Reported-by: Sergey Senozhatsky <senozhatsky@chromium.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org # needed for linux-5.4-y
+Link: https://lore.kernel.org/all/20241128111844.GE10431@google.com/
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/mod/modpost.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -785,7 +785,7 @@ static void check_section(const char *mo
+               ".ltext", ".ltext.*"
+ #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
+               ".fixup", ".entry.text", ".exception.text", \
+-              ".coldtext", ".softirqentry.text"
++              ".coldtext", ".softirqentry.text", ".irqentry.text"
+ #define ALL_TEXT_SECTIONS  ".init.text", ".exit.text", \
+               TEXT_SECTIONS, OTHER_TEXT_SECTIONS
diff --git a/queue-6.12/regmap-detach-regmap-from-dev-on-regmap_exit.patch b/queue-6.12/regmap-detach-regmap-from-dev-on-regmap_exit.patch
new file mode 100644 (file)
index 0000000..0c08692
--- /dev/null
@@ -0,0 +1,59 @@
+From 3061e170381af96d1e66799d34264e6414d428a7 Mon Sep 17 00:00:00 2001
+From: Cosmin Tanislav <demonsingur@gmail.com>
+Date: Thu, 28 Nov 2024 15:16:23 +0200
+Subject: regmap: detach regmap from dev on regmap_exit
+
+From: Cosmin Tanislav <demonsingur@gmail.com>
+
+commit 3061e170381af96d1e66799d34264e6414d428a7 upstream.
+
+At the end of __regmap_init(), if dev is not NULL, regmap_attach_dev()
+is called, which adds a devres reference to the regmap, to be able to
+retrieve a dev's regmap by name using dev_get_regmap().
+
+When calling regmap_exit, the opposite does not happen, and the
+reference is kept until the dev is detached.
+
+Add a regmap_detach_dev() function and call it in regmap_exit() to make
+sure that the devres reference is not kept.
+
+Cc: stable@vger.kernel.org
+Fixes: 72b39f6f2b5a ("regmap: Implement dev_get_regmap()")
+Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com>
+Rule: add
+Link: https://lore.kernel.org/stable/20241128130554.362486-1-demonsingur%40gmail.com
+Link: https://patch.msgid.link/20241128131625.363835-1-demonsingur@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/regmap/regmap.c |   12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -598,6 +598,17 @@ int regmap_attach_dev(struct device *dev
+ }
+ EXPORT_SYMBOL_GPL(regmap_attach_dev);
++static int dev_get_regmap_match(struct device *dev, void *res, void *data);
++
++static int regmap_detach_dev(struct device *dev, struct regmap *map)
++{
++      if (!dev)
++              return 0;
++
++      return devres_release(dev, dev_get_regmap_release,
++                            dev_get_regmap_match, (void *)map->name);
++}
++
+ static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus,
+                                       const struct regmap_config *config)
+ {
+@@ -1444,6 +1455,7 @@ void regmap_exit(struct regmap *map)
+ {
+       struct regmap_async *async;
++      regmap_detach_dev(map->dev, map);
+       regcache_exit(map);
+       regmap_debugfs_exit(map);
diff --git a/queue-6.12/selftest-hugetlb_dio-fix-test-naming.patch b/queue-6.12/selftest-hugetlb_dio-fix-test-naming.patch
new file mode 100644 (file)
index 0000000..b59c255
--- /dev/null
@@ -0,0 +1,66 @@
+From 4ae132c693896b0713db572676c90ffd855a4246 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 27 Nov 2024 16:14:22 +0000
+Subject: selftest: hugetlb_dio: fix test naming
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 4ae132c693896b0713db572676c90ffd855a4246 upstream.
+
+The string logged when a test passes or fails is used by the selftest
+framework to identify which test is being reported.  The hugetlb_dio test
+not only uses the same strings for every test that is run but it also uses
+different strings for test passes and failures which means that test
+automation is unable to follow what the test is doing at all.
+
+Pull the existing duplicated logging of the number of free huge pages
+before and after the test out of the conditional and replace that and the
+logging of the result with a single ksft_print_result() which incorporates
+the parameters passed into the test into the output.
+
+Link: https://lkml.kernel.org/r/20241127-kselftest-mm-hugetlb-dio-names-v1-1-22aab01bf550@kernel.org
+Fixes: fae1980347bf ("selftests: hugetlb_dio: fixup check for initial conditions to skip in the start")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Donet Tom <donettom@linux.ibm.com>
+Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/hugetlb_dio.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
+index 432d5af15e66..db63abe5ee5e 100644
+--- a/tools/testing/selftests/mm/hugetlb_dio.c
++++ b/tools/testing/selftests/mm/hugetlb_dio.c
+@@ -76,19 +76,15 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
+       /* Get the free huge pages after unmap*/
+       free_hpage_a = get_free_hugepages();
++      ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
++      ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
++
+       /*
+        * If the no. of free hugepages before allocation and after unmap does
+        * not match - that means there could still be a page which is pinned.
+        */
+-      if (free_hpage_a != free_hpage_b) {
+-              ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+-              ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+-              ksft_test_result_fail(": Huge pages not freed!\n");
+-      } else {
+-              ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+-              ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+-              ksft_test_result_pass(": Huge pages freed successfully !\n");
+-      }
++      ksft_test_result(free_hpage_a == free_hpage_b,
++                       "free huge pages from %u-%u\n", start_off, end_off);
+ }
+ int main(void)
+-- 
+2.47.1
+
diff --git a/queue-6.12/selftests-damon-add-_damon_sysfs.py-to-test_files.patch b/queue-6.12/selftests-damon-add-_damon_sysfs.py-to-test_files.patch
new file mode 100644 (file)
index 0000000..ae3533f
--- /dev/null
@@ -0,0 +1,48 @@
+From 4a475c0a7eeb3368eca40fe7cb02d157eeddc77a Mon Sep 17 00:00:00 2001
+From: Maximilian Heyne <mheyne@amazon.de>
+Date: Wed, 27 Nov 2024 12:08:53 +0000
+Subject: selftests/damon: add _damon_sysfs.py to TEST_FILES
+
+From: Maximilian Heyne <mheyne@amazon.de>
+
+commit 4a475c0a7eeb3368eca40fe7cb02d157eeddc77a upstream.
+
+When running selftests I encountered the following error message with
+some damon tests:
+
+ # Traceback (most recent call last):
+ #   File "[...]/damon/./damos_quota.py", line 7, in <module>
+ #     import _damon_sysfs
+ # ModuleNotFoundError: No module named '_damon_sysfs'
+
+Fix this by adding the _damon_sysfs.py file to TEST_FILES so that it
+will be available when running the respective damon selftests.
+
+Link: https://lkml.kernel.org/r/20241127-picks-visitor-7416685b-mheyne@amazon.de
+Fixes: 306abb63a8ca ("selftests/damon: implement a python module for test-purpose DAMON sysfs controls")
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/damon/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
+index 5b2a6a5dd1af..812f656260fb 100644
+--- a/tools/testing/selftests/damon/Makefile
++++ b/tools/testing/selftests/damon/Makefile
+@@ -6,7 +6,7 @@ TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
+ TEST_GEN_FILES += debugfs_target_ids_pid_leak
+ TEST_GEN_FILES += access_memory access_memory_even
+-TEST_FILES = _chk_dependency.sh _debugfs_common.sh
++TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py
+ # functionality tests
+ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+-- 
+2.47.1
+
index 5747f258f0de904fce8902b5e9ad279f97a31f95..6769d92f6c1d79f1bb66be3cbbd08e7be95f9f89 100644 (file)
@@ -170,4 +170,35 @@ io_uring-change-res2-parameter-type-in-io_uring_cmd_done.patch
 bcache-revert-replacing-is_err_or_null-with-is_err-again.patch
 revert-readahead-properly-shorten-readahead-when-falling-back-to-do_page_cache_ra.patch
 pmdomain-imx-gpcv2-adjust-delay-after-power-up-handshake.patch
+selftests-damon-add-_damon_sysfs.py-to-test_files.patch
+selftest-hugetlb_dio-fix-test-naming.patch
+cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch
+x86-cacheinfo-delete-global-num_cache_leaves.patch
+drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch
+drm-dp_mst-fix-mst-sideband-message-body-length-check.patch
+drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch
+drm-amd-pm-fix-and-simplify-workload-handling.patch
+drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch
+drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch
+drm-amdgpu-rework-resume-handling-for-display-v2.patch
+drm-amd-display-correct-prefetch-calculation.patch
+drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch
+drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch
+drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch
+modpost-add-.irqentry.text-to-other_sections.patch
+x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch
+bpf-fix-oob-devmap-writes-when-deleting-elements.patch
+dma-buf-fix-dma_fence_array_signaled-v4.patch
+dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch
+dma-fence-use-kernel-s-sort-for-merging-fences.patch
+xsk-fix-oob-map-writes-when-deleting-elements.patch
+regmap-detach-regmap-from-dev-on-regmap_exit.patch
+arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch
+mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch
+mmc-core-further-prevent-card-detect-during-shutdown.patch
+x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch
 scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch
diff --git a/queue-6.12/x86-cacheinfo-delete-global-num_cache_leaves.patch b/queue-6.12/x86-cacheinfo-delete-global-num_cache_leaves.patch
new file mode 100644 (file)
index 0000000..e13e415
--- /dev/null
@@ -0,0 +1,136 @@
+From 9677be09e5e4fbe48aeccb06ae3063c5eba331c3 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Wed, 27 Nov 2024 16:22:47 -0800
+Subject: x86/cacheinfo: Delete global num_cache_leaves
+
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+
+commit 9677be09e5e4fbe48aeccb06ae3063c5eba331c3 upstream.
+
+Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all
+CPUs from the same global "num_cache_leaves".
+
+This is erroneous on systems such as Meteor Lake, where each CPU has a
+distinct num_leaves value. Delete the global "num_cache_leaves" and
+initialize num_leaves on each CPU.
+
+init_cache_level() no longer needs to set num_leaves. Also, it never had to
+set num_levels as it is unnecessary in x86. Keep checking for zero cache
+leaves. Such condition indicates a bug.
+
+  [ bp: Cleanup. ]
+
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org # 6.3+
+Link: https://lore.kernel.org/r/20241128002247.26726-3-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/cacheinfo.c |   43 +++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/kernel/cpu/cacheinfo.c
++++ b/arch/x86/kernel/cpu/cacheinfo.c
+@@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
+       struct amd_northbridge *nb;
+ };
+-static unsigned short num_cache_leaves;
+-
+ /* AMD doesn't have CPUID4. Emulate it here to report the same
+    information to the user.  This makes some assumptions about the machine:
+    L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+@@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct
+ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
+ {
++      struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+       if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+-              num_cache_leaves = find_num_cache_leaves(c);
++              ci->num_leaves = find_num_cache_leaves(c);
+       } else if (c->extended_cpuid_level >= 0x80000006) {
+               if (cpuid_edx(0x80000006) & 0xf000)
+-                      num_cache_leaves = 4;
++                      ci->num_leaves = 4;
+               else
+-                      num_cache_leaves = 3;
++                      ci->num_leaves = 3;
+       }
+ }
+ void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
+ {
+-      num_cache_leaves = find_num_cache_leaves(c);
++      struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
++
++      ci->num_leaves = find_num_cache_leaves(c);
+ }
+ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
+@@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo
+       unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+       unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+       unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
++      struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+       if (c->cpuid_level > 3) {
+-              static int is_initialized;
+-
+-              if (is_initialized == 0) {
+-                      /* Init num_cache_leaves from boot CPU */
+-                      num_cache_leaves = find_num_cache_leaves(c);
+-                      is_initialized++;
+-              }
++              /*
++               * There should be at least one leaf. A non-zero value means
++               * that the number of leaves has been initialized.
++               */
++              if (!ci->num_leaves)
++                      ci->num_leaves = find_num_cache_leaves(c);
+               /*
+                * Whenever possible use cpuid(4), deterministic cache
+                * parameters cpuid leaf to find the cache details
+                */
+-              for (i = 0; i < num_cache_leaves; i++) {
++              for (i = 0; i < ci->num_leaves; i++) {
+                       struct _cpuid4_info_regs this_leaf = {};
+                       int retval;
+@@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo
+        * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+        * trace cache
+        */
+-      if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
++      if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
+               /* supports eax=2  call */
+               int j, n;
+               unsigned int regs[4];
+               unsigned char *dp = (unsigned char *)regs;
+               int only_trace = 0;
+-              if (num_cache_leaves != 0 && c->x86 == 15)
++              if (ci->num_leaves && c->x86 == 15)
+                       only_trace = 1;
+               /* Number of times to iterate */
+@@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinf
+ int init_cache_level(unsigned int cpu)
+ {
+-      struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
++      struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+-      if (!num_cache_leaves)
++      /* There should be at least one leaf. */
++      if (!ci->num_leaves)
+               return -ENOENT;
+-      if (!this_cpu_ci)
+-              return -EINVAL;
+-      this_cpu_ci->num_levels = 3;
+-      this_cpu_ci->num_leaves = num_cache_leaves;
++
+       return 0;
+ }
diff --git a/queue-6.12/x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch b/queue-6.12/x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch
new file mode 100644 (file)
index 0000000..95c316e
--- /dev/null
@@ -0,0 +1,44 @@
+From c9a4b55431e5220347881e148725bed69c84e037 Mon Sep 17 00:00:00 2001
+From: Len Brown <len.brown@intel.com>
+Date: Tue, 12 Nov 2024 21:07:00 -0500
+Subject: x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation
+
+From: Len Brown <len.brown@intel.com>
+
+commit c9a4b55431e5220347881e148725bed69c84e037 upstream.
+
+Under some conditions, MONITOR wakeups on Lunar Lake processors
+can be lost, resulting in significant user-visible delays.
+
+Add Lunar Lake to X86_BUG_MONITOR so that wake_up_idle_cpu()
+always sends an IPI, avoiding this potential delay.
+
+Reported originally here:
+
+       https://bugzilla.kernel.org/show_bug.cgi?id=219364
+
+[ dhansen: tweak subject ]
+
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/a4aa8842a3c3bfdb7fe9807710eef159cbf0e705.1731463305.git.len.brown%40intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/intel.c |    4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -586,7 +586,9 @@ static void init_intel(struct cpuinfo_x8
+            c->x86_vfm == INTEL_WESTMERE_EX))
+               set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
+-      if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
++      if (boot_cpu_has(X86_FEATURE_MWAIT) &&
++          (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
++           c->x86_vfm == INTEL_LUNARLAKE_M))
+               set_cpu_bug(c, X86_BUG_MONITOR);
+ #ifdef CONFIG_X86_64
diff --git a/queue-6.12/x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch b/queue-6.12/x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch
new file mode 100644 (file)
index 0000000..9a9adcd
--- /dev/null
@@ -0,0 +1,82 @@
+From 07fa619f2a40c221ea27747a3323cabc59ab25eb Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 5 Dec 2024 15:05:07 +0000
+Subject: x86/kexec: Restore GDT on return from ::preserve_context kexec
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 07fa619f2a40c221ea27747a3323cabc59ab25eb upstream.
+
+The restore_processor_state() function explicitly states that "the asm code
+that gets us here will have restored a usable GDT". That wasn't true in the
+case of returning from a ::preserve_context kexec. Make it so.
+
+Without this, the kernel was depending on the called function to reload a
+GDT which is appropriate for the kernel before returning.
+
+Test program:
+
+ #include <unistd.h>
+ #include <errno.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <linux/kexec.h>
+ #include <linux/reboot.h>
+ #include <sys/reboot.h>
+ #include <sys/syscall.h>
+
+ int main (void)
+ {
+        struct kexec_segment segment = {};
+       unsigned char purgatory[] = {
+               0x66, 0xba, 0xf8, 0x03, // mov $0x3f8, %dx
+               0xb0, 0x42,             // mov $0x42, %al
+               0xee,                   // outb %al, (%dx)
+               0xc3,                   // ret
+       };
+       int ret;
+
+       segment.buf = &purgatory;
+       segment.bufsz = sizeof(purgatory);
+       segment.mem = (void *)0x400000;
+       segment.memsz = 0x1000;
+       ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+       if (ret) {
+               perror("kexec_load");
+               exit(1);
+       }
+
+       ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+       if (ret) {
+               perror("kexec reboot");
+               exit(1);
+       }
+       printf("Success\n");
+       return 0;
+ }
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241205153343.3275139-2-dwmw2@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/relocate_kernel_64.S |    7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+       movq    CR0(%r8), %r8
+       movq    %rax, %cr3
+       movq    %r8, %cr0
++
++#ifdef CONFIG_KEXEC_JUMP
++      /* Saved in save_processor_state. */
++      movq    $saved_context, %rax
++      lgdt    saved_context_gdt_desc(%rax)
++#endif
++
+       movq    %rbp, %rax
+       popf
diff --git a/queue-6.12/xsk-fix-oob-map-writes-when-deleting-elements.patch b/queue-6.12/xsk-fix-oob-map-writes-when-deleting-elements.patch
new file mode 100644 (file)
index 0000000..8b3204a
--- /dev/null
@@ -0,0 +1,113 @@
+From 32cd3db7de97c0c7a018756ce66244342fd583f0 Mon Sep 17 00:00:00 2001
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Date: Fri, 22 Nov 2024 13:10:29 +0100
+Subject: xsk: fix OOB map writes when deleting elements
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+commit 32cd3db7de97c0c7a018756ce66244342fd583f0 upstream.
+
+Jordy says:
+
+"
+In the xsk_map_delete_elem function an unsigned integer
+(map->max_entries) is compared with a user-controlled signed integer
+(k). Due to implicit type conversion, a large unsigned value for
+map->max_entries can bypass the intended bounds check:
+
+       if (k >= map->max_entries)
+               return -EINVAL;
+
+This allows k to hold a negative value (between -2147483648 and -2),
+which is then used as an array index in m->xsk_map[k], which results
+in an out-of-bounds access.
+
+       spin_lock_bh(&m->lock);
+       map_entry = &m->xsk_map[k]; // Out-of-bounds map_entry
+       old_xs = unrcu_pointer(xchg(map_entry, NULL));  // Oob write
+       if (old_xs)
+               xsk_map_sock_delete(old_xs, map_entry);
+       spin_unlock_bh(&m->lock);
+
+The xchg operation can then be used to cause an out-of-bounds write.
+Moreover, the invalid map_entry passed to xsk_map_sock_delete can lead
+to further memory corruption.
+"
+
+It indeed results in following splat:
+
+[76612.897343] BUG: unable to handle page fault for address: ffffc8fc2e461108
+[76612.904330] #PF: supervisor write access in kernel mode
+[76612.909639] #PF: error_code(0x0002) - not-present page
+[76612.914855] PGD 0 P4D 0
+[76612.917431] Oops: Oops: 0002 [#1] PREEMPT SMP
+[76612.921859] CPU: 11 UID: 0 PID: 10318 Comm: a.out Not tainted 6.12.0-rc1+ #470
+[76612.929189] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
+[76612.939781] RIP: 0010:xsk_map_delete_elem+0x2d/0x60
+[76612.944738] Code: 00 00 41 54 55 53 48 63 2e 3b 6f 24 73 38 4c 8d a7 f8 00 00 00 48 89 fb 4c 89 e7 e8 2d bf 05 00 48 8d b4 eb 00 01 00 00 31 ff <48> 87 3e 48 85 ff 74 05 e8 16 ff ff ff 4c 89 e7 e8 3e bc 05 00 31
+[76612.963774] RSP: 0018:ffffc9002e407df8 EFLAGS: 00010246
+[76612.969079] RAX: 0000000000000000 RBX: ffffc9002e461000 RCX: 0000000000000000
+[76612.976323] RDX: 0000000000000001 RSI: ffffc8fc2e461108 RDI: 0000000000000000
+[76612.983569] RBP: ffffffff80000001 R08: 0000000000000000 R09: 0000000000000007
+[76612.990812] R10: ffffc9002e407e18 R11: ffff888108a38858 R12: ffffc9002e4610f8
+[76612.998060] R13: ffff888108a38858 R14: 00007ffd1ae0ac78 R15: ffffc9002e4610c0
+[76613.005303] FS:  00007f80b6f59740(0000) GS:ffff8897e0ec0000(0000) knlGS:0000000000000000
+[76613.013517] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[76613.019349] CR2: ffffc8fc2e461108 CR3: 000000011e3ef001 CR4: 00000000007726f0
+[76613.026595] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[76613.033841] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[76613.041086] PKRU: 55555554
+[76613.043842] Call Trace:
+[76613.046331]  <TASK>
+[76613.048468]  ? __die+0x20/0x60
+[76613.051581]  ? page_fault_oops+0x15a/0x450
+[76613.055747]  ? search_extable+0x22/0x30
+[76613.059649]  ? search_bpf_extables+0x5f/0x80
+[76613.063988]  ? exc_page_fault+0xa9/0x140
+[76613.067975]  ? asm_exc_page_fault+0x22/0x30
+[76613.072229]  ? xsk_map_delete_elem+0x2d/0x60
+[76613.076573]  ? xsk_map_delete_elem+0x23/0x60
+[76613.080914]  __sys_bpf+0x19b7/0x23c0
+[76613.084555]  __x64_sys_bpf+0x1a/0x20
+[76613.088194]  do_syscall_64+0x37/0xb0
+[76613.091832]  entry_SYSCALL_64_after_hwframe+0x4b/0x53
+[76613.096962] RIP: 0033:0x7f80b6d1e88d
+[76613.100592] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48
+[76613.119631] RSP: 002b:00007ffd1ae0ac68 EFLAGS: 00000206 ORIG_RAX: 0000000000000141
+[76613.131330] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f80b6d1e88d
+[76613.142632] RDX: 0000000000000098 RSI: 00007ffd1ae0ad20 RDI: 0000000000000003
+[76613.153967] RBP: 00007ffd1ae0adc0 R08: 0000000000000000 R09: 0000000000000000
+[76613.166030] R10: 00007f80b6f77040 R11: 0000000000000206 R12: 00007ffd1ae0aed8
+[76613.177130] R13: 000055ddf42ce1e9 R14: 000055ddf42d0d98 R15: 00007f80b6fab040
+[76613.188129]  </TASK>
+
+Fix this by simply changing key type from int to u32.
+
+Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP")
+CC: stable@vger.kernel.org
+Reported-by: Jordy Zomer <jordyzomer@google.com>
+Suggested-by: Jordy Zomer <jordyzomer@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Link: https://lore.kernel.org/r/20241122121030.716788-2-maciej.fijalkowski@intel.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xdp/xskmap.c |    2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/xdp/xskmap.c
++++ b/net/xdp/xskmap.c
+@@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct b
+       struct xsk_map *m = container_of(map, struct xsk_map, map);
+       struct xdp_sock __rcu **map_entry;
+       struct xdp_sock *old_xs;
+-      int k = *(u32 *)key;
++      u32 k = *(u32 *)key;
+       if (k >= map->max_entries)
+               return -EINVAL;