--- /dev/null
+From 180bbad698641873120a48857bb3b9f3166bf684 Mon Sep 17 00:00:00 2001
+From: Marc Zyngier <maz@kernel.org>
+Date: Sun, 1 Dec 2024 09:27:02 +0000
+Subject: arch_numa: Restore nid checks before registering a memblock with a node
+
+From: Marc Zyngier <maz@kernel.org>
+
+commit 180bbad698641873120a48857bb3b9f3166bf684 upstream.
+
+Commit 767507654c22 ("arch_numa: switch over to numa_memblks")
+significantly cleaned up the NUMA registration code, but also
+dropped a significant check that was refusing to accept to
+configure a memblock with an invalid nid.
+
+On "quality hardware" such as my ThunderX machine, this results
+in a kernel that dies immediately:
+
+[ 0.000000] Booting Linux on physical CPU 0x0000000000 [0x431f0a10]
+[ 0.000000] Linux version 6.12.0-00013-g8920d74cf8db (maz@valley-girl) (gcc (Debian 12.2.0-14) 12.2.0, GNU ld (GNU Binutils for Debian) 2.40) #3872 SMP PREEMPT Wed Nov 27 15:25:49 GMT 2024
+[ 0.000000] KASLR disabled due to lack of seed
+[ 0.000000] Machine model: Cavium ThunderX CN88XX board
+[ 0.000000] efi: EFI v2.4 by American Megatrends
+[ 0.000000] efi: ESRT=0xffce0ff18 SMBIOS 3.0=0xfffb0000 ACPI 2.0=0xffec60000 MEMRESERVE=0xffc905d98
+[ 0.000000] esrt: Reserving ESRT space from 0x0000000ffce0ff18 to 0x0000000ffce0ff50.
+[ 0.000000] earlycon: pl11 at MMIO 0x000087e024000000 (options '115200n8')
+[ 0.000000] printk: legacy bootconsole [pl11] enabled
+[ 0.000000] NODE_DATA(0) allocated [mem 0xff6754580-0xff67566bf]
+[ 0.000000] Unable to handle kernel paging request at virtual address 0000000000001d40
+[ 0.000000] Mem abort info:
+[ 0.000000] ESR = 0x0000000096000004
+[ 0.000000] EC = 0x25: DABT (current EL), IL = 32 bits
+[ 0.000000] SET = 0, FnV = 0
+[ 0.000000] EA = 0, S1PTW = 0
+[ 0.000000] FSC = 0x04: level 0 translation fault
+[ 0.000000] Data abort info:
+[ 0.000000] ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+[ 0.000000] CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[ 0.000000] GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[ 0.000000] [0000000000001d40] user address but active_mm is swapper
+[ 0.000000] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[ 0.000000] Modules linked in:
+[ 0.000000] CPU: 0 UID: 0 PID: 0 Comm: swapper Not tainted 6.12.0-00013-g8920d74cf8db #3872
+[ 0.000000] Hardware name: Cavium ThunderX CN88XX board (DT)
+[ 0.000000] pstate: a00000c5 (NzCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[ 0.000000] pc : sparse_init_nid+0x54/0x428
+[ 0.000000] lr : sparse_init+0x118/0x240
+[ 0.000000] sp : ffff800081da3cb0
+[ 0.000000] x29: ffff800081da3cb0 x28: 0000000fedbab10c x27: 0000000000000001
+[ 0.000000] x26: 0000000ffee250f8 x25: 0000000000000001 x24: ffff800082102cd0
+[ 0.000000] x23: 0000000000000001 x22: 0000000000000000 x21: 00000000001fffff
+[ 0.000000] x20: 0000000000000001 x19: 0000000000000000 x18: ffffffffffffffff
+[ 0.000000] x17: 0000000001b00000 x16: 0000000ffd130000 x15: 0000000000000000
+[ 0.000000] x14: 00000000003e0000 x13: 00000000000001c8 x12: 0000000000000014
+[ 0.000000] x11: ffff800081e82860 x10: ffff8000820fb2c8 x9 : ffff8000820fb490
+[ 0.000000] x8 : 0000000000ffed20 x7 : 0000000000000014 x6 : 00000000001fffff
+[ 0.000000] x5 : 00000000ffffffff x4 : 0000000000000000 x3 : 0000000000000000
+[ 0.000000] x2 : 0000000000000000 x1 : 0000000000000040 x0 : 0000000000000007
+[ 0.000000] Call trace:
+[ 0.000000] sparse_init_nid+0x54/0x428
+[ 0.000000] sparse_init+0x118/0x240
+[ 0.000000] bootmem_init+0x70/0x1c8
+[ 0.000000] setup_arch+0x184/0x270
+[ 0.000000] start_kernel+0x74/0x670
+[ 0.000000] __primary_switched+0x80/0x90
+[ 0.000000] Code: f865d804 d37df060 cb030000 d2800003 (b95d4084)
+[ 0.000000] ---[ end trace 0000000000000000 ]---
+[ 0.000000] Kernel panic - not syncing: Attempted to kill the idle task!
+[ 0.000000] ---[ end Kernel panic - not syncing: Attempted to kill the idle task! ]---
+
+while previous kernel versions were able to recognise how brain-damaged
+the machine is, and only build a fake node.
+
+Use the memblock_validate_numa_coverage() helper to restore some sanity
+and a "working" system.
+
+Fixes: 767507654c22 ("arch_numa: switch over to numa_memblks")
+Suggested-by: Mike Rapoport <rppt@kernel.org>
+Signed-off-by: Marc Zyngier <maz@kernel.org>
+Cc: Catalin Marinas <catalin.marinas@arm.com>
+Cc: Will Deacon <will@kernel.org>
+Cc: Zi Yan <ziy@nvidia.com>
+Cc: Dan Williams <dan.j.williams@intel.com>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241201092702.3792845-1-maz@kernel.org
+Signed-off-by: Mike Rapoport (Microsoft) <rppt@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/arch_numa.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/base/arch_numa.c b/drivers/base/arch_numa.c
+index e18701676426..c99f2ab105e5 100644
+--- a/drivers/base/arch_numa.c
++++ b/drivers/base/arch_numa.c
+@@ -208,6 +208,10 @@ static int __init numa_register_nodes(void)
+ {
+ int nid;
+
++ /* Check the validity of the memblock/node mapping */
++ if (!memblock_validate_numa_coverage(0))
++ return -EINVAL;
++
+ /* Finally register nodes. */
+ for_each_node_mask(nid, numa_nodes_parsed) {
+ unsigned long start_pfn, end_pfn;
+--
+2.47.1
+
--- /dev/null
+From ab244dd7cf4c291f82faacdc50b45cc0f55b674d Mon Sep 17 00:00:00 2001
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Date: Fri, 22 Nov 2024 13:10:30 +0100
+Subject: bpf: fix OOB devmap writes when deleting elements
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+commit ab244dd7cf4c291f82faacdc50b45cc0f55b674d upstream.
+
+Jordy reported issue against XSKMAP which also applies to DEVMAP - the
+index used for accessing map entry, due to being a signed integer,
+causes the OOB writes. Fix is simple as changing the type from int to
+u32, however, when compared to XSKMAP case, one more thing needs to be
+addressed.
+
+When map is released from system via dev_map_free(), we iterate through
+all of the entries and an iterator variable is also an int, which
+implies OOB accesses. Again, change it to be u32.
+
+Example splat below:
+
+[ 160.724676] BUG: unable to handle page fault for address: ffffc8fc2c001000
+[ 160.731662] #PF: supervisor read access in kernel mode
+[ 160.736876] #PF: error_code(0x0000) - not-present page
+[ 160.742095] PGD 0 P4D 0
+[ 160.744678] Oops: Oops: 0000 [#1] PREEMPT SMP
+[ 160.749106] CPU: 1 UID: 0 PID: 520 Comm: kworker/u145:12 Not tainted 6.12.0-rc1+ #487
+[ 160.757050] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
+[ 160.767642] Workqueue: events_unbound bpf_map_free_deferred
+[ 160.773308] RIP: 0010:dev_map_free+0x77/0x170
+[ 160.777735] Code: 00 e8 fd 91 ed ff e8 b8 73 ed ff 41 83 7d 18 19 74 6e 41 8b 45 24 49 8b bd f8 00 00 00 31 db 85 c0 74 48 48 63 c3 48 8d 04 c7 <48> 8b 28 48 85 ed 74 30 48 8b 7d 18 48 85 ff 74 05 e8 b3 52 fa ff
+[ 160.796777] RSP: 0018:ffffc9000ee1fe38 EFLAGS: 00010202
+[ 160.802086] RAX: ffffc8fc2c001000 RBX: 0000000080000000 RCX: 0000000000000024
+[ 160.809331] RDX: 0000000000000000 RSI: 0000000000000024 RDI: ffffc9002c001000
+[ 160.816576] RBP: 0000000000000000 R08: 0000000000000023 R09: 0000000000000001
+[ 160.823823] R10: 0000000000000001 R11: 00000000000ee6b2 R12: dead000000000122
+[ 160.831066] R13: ffff88810c928e00 R14: ffff8881002df405 R15: 0000000000000000
+[ 160.838310] FS: 0000000000000000(0000) GS:ffff8897e0c40000(0000) knlGS:0000000000000000
+[ 160.846528] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 160.852357] CR2: ffffc8fc2c001000 CR3: 0000000005c32006 CR4: 00000000007726f0
+[ 160.859604] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[ 160.866847] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[ 160.874092] PKRU: 55555554
+[ 160.876847] Call Trace:
+[ 160.879338] <TASK>
+[ 160.881477] ? __die+0x20/0x60
+[ 160.884586] ? page_fault_oops+0x15a/0x450
+[ 160.888746] ? search_extable+0x22/0x30
+[ 160.892647] ? search_bpf_extables+0x5f/0x80
+[ 160.896988] ? exc_page_fault+0xa9/0x140
+[ 160.900973] ? asm_exc_page_fault+0x22/0x30
+[ 160.905232] ? dev_map_free+0x77/0x170
+[ 160.909043] ? dev_map_free+0x58/0x170
+[ 160.912857] bpf_map_free_deferred+0x51/0x90
+[ 160.917196] process_one_work+0x142/0x370
+[ 160.921272] worker_thread+0x29e/0x3b0
+[ 160.925082] ? rescuer_thread+0x4b0/0x4b0
+[ 160.929157] kthread+0xd4/0x110
+[ 160.932355] ? kthread_park+0x80/0x80
+[ 160.936079] ret_from_fork+0x2d/0x50
+[ 160.943396] ? kthread_park+0x80/0x80
+[ 160.950803] ret_from_fork_asm+0x11/0x20
+[ 160.958482] </TASK>
+
+Fixes: 546ac1ffb70d ("bpf: add devmap, a map for storing net device references")
+CC: stable@vger.kernel.org
+Reported-by: Jordy Zomer <jordyzomer@google.com>
+Suggested-by: Jordy Zomer <jordyzomer@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Link: https://lore.kernel.org/r/20241122121030.716788-3-maciej.fijalkowski@intel.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ kernel/bpf/devmap.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/kernel/bpf/devmap.c
++++ b/kernel/bpf/devmap.c
+@@ -184,7 +184,7 @@ static struct bpf_map *dev_map_alloc(uni
+ static void dev_map_free(struct bpf_map *map)
+ {
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+- int i;
++ u32 i;
+
+ /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+ * so the programs (can be more than one that used this map) were
+@@ -821,7 +821,7 @@ static long dev_map_delete_elem(struct b
+ {
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+ struct bpf_dtab_netdev *old_dev;
+- int k = *(u32 *)key;
++ u32 k = *(u32 *)key;
+
+ if (k >= map->max_entries)
+ return -EINVAL;
+@@ -838,7 +838,7 @@ static long dev_map_hash_delete_elem(str
+ {
+ struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+ struct bpf_dtab_netdev *old_dev;
+- int k = *(u32 *)key;
++ u32 k = *(u32 *)key;
+ unsigned long flags;
+ int ret = -ENOENT;
+
--- /dev/null
+From b3fce429a1e030b50c1c91351d69b8667eef627b Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Wed, 27 Nov 2024 16:22:46 -0800
+Subject: cacheinfo: Allocate memory during CPU hotplug if not done from the primary CPU
+
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+
+commit b3fce429a1e030b50c1c91351d69b8667eef627b upstream.
+
+Commit
+
+ 5944ce092b97 ("arch_topology: Build cacheinfo from primary CPU")
+
+adds functionality that architectures can use to optionally allocate and
+build cacheinfo early during boot. Commit
+
+ 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
+
+lets secondary CPUs correct (and reallocate memory) cacheinfo data if
+needed.
+
+If the early build functionality is not used and cacheinfo does not need
+correction, memory for cacheinfo is never allocated. x86 does not use
+the early build functionality. Consequently, during the cacheinfo CPU
+hotplug callback, last_level_cache_is_valid() attempts to dereference
+a NULL pointer:
+
+ BUG: kernel NULL pointer dereference, address: 0000000000000100
+ #PF: supervisor read access in kernel mode
+ #PF: error_code(0x0000) - not present page
+ PGD 0 P4D 0
+ Oops: 0000 [#1] PREEPMT SMP NOPTI
+ CPU: 0 PID 19 Comm: cpuhp/0 Not tainted 6.4.0-rc2 #1
+ RIP: 0010: last_level_cache_is_valid+0x95/0xe0a
+
+Allocate memory for cacheinfo during the cacheinfo CPU hotplug callback
+if not done earlier.
+
+Moreover, before determining the validity of the last-level cache info,
+ensure that it has been allocated. Simply checking for non-zero
+cache_leaves() is not sufficient, as some architectures (e.g., Intel
+processors) have non-zero cache_leaves() before allocation.
+
+Dereferencing NULL cacheinfo can occur in update_per_cpu_data_slice_size().
+This function iterates over all online CPUs. However, a CPU may have come
+online recently, but its cacheinfo may not have been allocated yet.
+
+While here, remove an unnecessary indentation in allocate_cache_info().
+
+ [ bp: Massage. ]
+
+Fixes: 6539cffa9495 ("cacheinfo: Add arch specific early level initializer")
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Reviewed-by: Radu Rendec <rrendec@redhat.com>
+Reviewed-by: Nikolay Borisov <nik.borisov@suse.com>
+Reviewed-by: Andreas Herrmann <aherrmann@suse.de>
+Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
+Cc: stable@vger.kernel.org # 6.3+
+Link: https://lore.kernel.org/r/20241128002247.26726-2-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/cacheinfo.c | 14 ++++++++------
+ 1 file changed, 8 insertions(+), 6 deletions(-)
+
+--- a/drivers/base/cacheinfo.c
++++ b/drivers/base/cacheinfo.c
+@@ -58,7 +58,7 @@ bool last_level_cache_is_valid(unsigned
+ {
+ struct cacheinfo *llc;
+
+- if (!cache_leaves(cpu))
++ if (!cache_leaves(cpu) || !per_cpu_cacheinfo(cpu))
+ return false;
+
+ llc = per_cpu_cacheinfo_idx(cpu, cache_leaves(cpu) - 1);
+@@ -463,11 +463,9 @@ int __weak populate_cache_leaves(unsigne
+ return -ENOENT;
+ }
+
+-static inline
+-int allocate_cache_info(int cpu)
++static inline int allocate_cache_info(int cpu)
+ {
+- per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
+- sizeof(struct cacheinfo), GFP_ATOMIC);
++ per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu), sizeof(struct cacheinfo), GFP_ATOMIC);
+ if (!per_cpu_cacheinfo(cpu)) {
+ cache_leaves(cpu) = 0;
+ return -ENOMEM;
+@@ -539,7 +537,11 @@ static inline int init_level_allocate_ci
+ */
+ ci_cacheinfo(cpu)->early_ci_levels = false;
+
+- if (cache_leaves(cpu) <= early_leaves)
++ /*
++ * Some architectures (e.g., x86) do not use early initialization.
++ * Allocate memory now in such case.
++ */
++ if (cache_leaves(cpu) <= early_leaves && per_cpu_cacheinfo(cpu))
+ return 0;
+
+ kfree(per_cpu_cacheinfo(cpu));
--- /dev/null
+From 78ac1c3558810486d90aa533b0039aa70487a3da Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Christian=20K=C3=B6nig?= <christian.koenig@amd.com>
+Date: Fri, 8 Nov 2024 09:29:48 +0100
+Subject: dma-buf: fix dma_fence_array_signaled v4
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Christian König <christian.koenig@amd.com>
+
+commit 78ac1c3558810486d90aa533b0039aa70487a3da upstream.
+
+The function silently assumed that signaling was already enabled for the
+dma_fence_array. This meant that without enabling signaling first we would
+never see forward progress.
+
+Fix that by falling back to testing each individual fence when signaling
+isn't enabled yet.
+
+v2: add the comment suggested by Boris why this is done this way
+v3: fix the underflow pointed out by Tvrtko
+v4: atomic_read_acquire() as suggested by Tvrtko
+
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Reviewed-by: Boris Brezillon <boris.brezillon@collabora.com>
+Tested-by: Chia-I Wu <olvaffe@gmail.com>
+Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/12094
+Cc: <stable@vger.kernel.org>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241112121925.18464-1-christian.koenig@amd.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-array.c | 28 +++++++++++++++++++++++++++-
+ 1 file changed, 27 insertions(+), 1 deletion(-)
+
+--- a/drivers/dma-buf/dma-fence-array.c
++++ b/drivers/dma-buf/dma-fence-array.c
+@@ -103,10 +103,36 @@ static bool dma_fence_array_enable_signa
+ static bool dma_fence_array_signaled(struct dma_fence *fence)
+ {
+ struct dma_fence_array *array = to_dma_fence_array(fence);
++ int num_pending;
++ unsigned int i;
+
+- if (atomic_read(&array->num_pending) > 0)
++ /*
++ * We need to read num_pending before checking the enable_signal bit
++ * to avoid racing with the enable_signaling() implementation, which
++ * might decrement the counter, and cause a partial check.
++ * atomic_read_acquire() pairs with atomic_dec_and_test() in
++ * dma_fence_array_enable_signaling()
++ *
++ * The !--num_pending check is here to account for the any_signaled case
++ * if we race with enable_signaling(), that means the !num_pending check
++ * in the is_signalling_enabled branch might be outdated (num_pending
++ * might have been decremented), but that's fine. The user will get the
++ * right value when testing again later.
++ */
++ num_pending = atomic_read_acquire(&array->num_pending);
++ if (test_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT, &array->base.flags)) {
++ if (num_pending <= 0)
++ goto signal;
+ return false;
++ }
+
++ for (i = 0; i < array->num_fences; ++i) {
++ if (dma_fence_is_signaled(array->fences[i]) && !--num_pending)
++ goto signal;
++ }
++ return false;
++
++signal:
+ dma_fence_array_clear_pending_error(array);
+ return true;
+ }
--- /dev/null
+From 949291c5314009b4f6e252391edbb40fdd5d5414 Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Date: Fri, 15 Nov 2024 10:21:49 +0000
+Subject: dma-fence: Fix reference leak on fence merge failure path
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+commit 949291c5314009b4f6e252391edbb40fdd5d5414 upstream.
+
+Release all fence references if the output dma-fence-array could not be
+allocated.
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3")
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Sumit Semwal <sumit.semwal@linaro.org>
+Cc: Gustavo Padovan <gustavo@padovan.org>
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: linux-media@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Cc: linaro-mm-sig@lists.linaro.org
+Cc: <stable@vger.kernel.org> # v6.0+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-2-tursulin@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-unwrap.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/drivers/dma-buf/dma-fence-unwrap.c
++++ b/drivers/dma-buf/dma-fence-unwrap.c
+@@ -164,6 +164,8 @@ restart:
+ dma_fence_context_alloc(1),
+ 1, false);
+ if (!result) {
++ for (i = 0; i < count; i++)
++ dma_fence_put(array[i]);
+ tmp = NULL;
+ goto return_tmp;
+ }
--- /dev/null
+From fe52c649438b8489c9456681d93a9b3de3d38263 Mon Sep 17 00:00:00 2001
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Date: Fri, 15 Nov 2024 10:21:50 +0000
+Subject: dma-fence: Use kernel's sort for merging fences
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+commit fe52c649438b8489c9456681d93a9b3de3d38263 upstream.
+
+One alternative to the fix Christian proposed in
+https://lore.kernel.org/dri-devel/20241024124159.4519-3-christian.koenig@amd.com/
+is to replace the rather complex open coded sorting loops with the kernel
+standard sort followed by a context squashing pass.
+
+Proposed advantage of this would be readability but one concern Christian
+raised was that there could be many fences, that they are typically mostly
+sorted, and so the kernel's heap sort would be much worse by the proposed
+algorithm.
+
+I had a look running some games and vkcube to see what are the typical
+number of input fences. Tested scenarios:
+
+1) Hogwarts Legacy under Gamescope
+
+450 calls per second to __dma_fence_unwrap_merge.
+
+Percentages per number of fences buckets, before and after checking for
+signalled status, sorting and flattening:
+
+ N Before After
+ 0 0.91%
+ 1 69.40%
+ 2-3 28.72% 9.4% (90.6% resolved to one fence)
+ 4-5 0.93%
+ 6-9 0.03%
+ 10+
+
+2) Cyberpunk 2077 under Gamescope
+
+1050 calls per second, amounting to 0.01% CPU time according to perf top.
+
+ N Before After
+ 0 1.13%
+ 1 52.30%
+ 2-3 40.34% 55.57%
+ 4-5 1.46% 0.50%
+ 6-9 2.44%
+ 10+ 2.34%
+
+3) vkcube under Plasma
+
+90 calls per second.
+
+ N Before After
+ 0
+ 1
+ 2-3 100% 0% (Ie. all resolved to a single fence)
+ 4-5
+ 6-9
+ 10+
+
+In the case of vkcube all invocations in the 2-3 bucket were actually
+just two input fences.
+
+From these numbers it looks like the heap sort should not be a
+disadvantage, given how the dominant case is <= 2 input fences which heap
+sort solves with just one compare and swap. (And for the case of one input
+fence we have a fast path in the previous patch.)
+
+A complementary possibility is to implement a different sorting algorithm
+under the same API as the kernel's sort() and so keep the simplicity,
+potentially moving the new sort under lib/ if it would be found more
+widely useful.
+
+v2:
+ * Hold on to fence references and reduce commentary. (Christian)
+ * Record and use latest signaled timestamp in the 2nd loop too.
+ * Consolidate zero or one fences fast paths.
+
+v3:
+ * Reverse the seqno sort order for a simpler squashing pass. (Christian)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Fixes: 245a4a7b531c ("dma-buf: generalize dma_fence unwrap & merging v3")
+Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3617
+Cc: Christian König <christian.koenig@amd.com>
+Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
+Cc: Sumit Semwal <sumit.semwal@linaro.org>
+Cc: Gustavo Padovan <gustavo@padovan.org>
+Cc: Friedrich Vock <friedrich.vock@gmx.de>
+Cc: linux-media@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Cc: linaro-mm-sig@lists.linaro.org
+Cc: <stable@vger.kernel.org> # v6.0+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Christian König <christian.koenig@amd.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241115102153.1980-3-tursulin@igalia.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/dma-buf/dma-fence-unwrap.c | 126 +++++++++++++++++--------------------
+ 1 file changed, 60 insertions(+), 66 deletions(-)
+
+--- a/drivers/dma-buf/dma-fence-unwrap.c
++++ b/drivers/dma-buf/dma-fence-unwrap.c
+@@ -12,6 +12,7 @@
+ #include <linux/dma-fence-chain.h>
+ #include <linux/dma-fence-unwrap.h>
+ #include <linux/slab.h>
++#include <linux/sort.h>
+
+ /* Internal helper to start new array iteration, don't use directly */
+ static struct dma_fence *
+@@ -59,6 +60,25 @@ struct dma_fence *dma_fence_unwrap_next(
+ }
+ EXPORT_SYMBOL_GPL(dma_fence_unwrap_next);
+
++
++static int fence_cmp(const void *_a, const void *_b)
++{
++ struct dma_fence *a = *(struct dma_fence **)_a;
++ struct dma_fence *b = *(struct dma_fence **)_b;
++
++ if (a->context < b->context)
++ return -1;
++ else if (a->context > b->context)
++ return 1;
++
++ if (dma_fence_is_later(b, a))
++ return 1;
++ else if (dma_fence_is_later(a, b))
++ return -1;
++
++ return 0;
++}
++
+ /* Implementation for the dma_fence_merge() marco, don't use directly */
+ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
+ struct dma_fence **fences,
+@@ -67,8 +87,7 @@ struct dma_fence *__dma_fence_unwrap_mer
+ struct dma_fence_array *result;
+ struct dma_fence *tmp, **array;
+ ktime_t timestamp;
+- unsigned int i;
+- size_t count;
++ int i, j, count;
+
+ count = 0;
+ timestamp = ns_to_ktime(0);
+@@ -96,80 +115,55 @@ struct dma_fence *__dma_fence_unwrap_mer
+ if (!array)
+ return NULL;
+
+- /*
+- * This trashes the input fence array and uses it as position for the
+- * following merge loop. This works because the dma_fence_merge()
+- * wrapper macro is creating this temporary array on the stack together
+- * with the iterators.
+- */
+- for (i = 0; i < num_fences; ++i)
+- fences[i] = dma_fence_unwrap_first(fences[i], &iter[i]);
+-
+ count = 0;
+- do {
+- unsigned int sel;
+-
+-restart:
+- tmp = NULL;
+- for (i = 0; i < num_fences; ++i) {
+- struct dma_fence *next;
+-
+- while (fences[i] && dma_fence_is_signaled(fences[i]))
+- fences[i] = dma_fence_unwrap_next(&iter[i]);
+-
+- next = fences[i];
+- if (!next)
+- continue;
+-
+- /*
+- * We can't guarantee that inpute fences are ordered by
+- * context, but it is still quite likely when this
+- * function is used multiple times. So attempt to order
+- * the fences by context as we pass over them and merge
+- * fences with the same context.
+- */
+- if (!tmp || tmp->context > next->context) {
+- tmp = next;
+- sel = i;
+-
+- } else if (tmp->context < next->context) {
+- continue;
+-
+- } else if (dma_fence_is_later(tmp, next)) {
+- fences[i] = dma_fence_unwrap_next(&iter[i]);
+- goto restart;
++ for (i = 0; i < num_fences; ++i) {
++ dma_fence_unwrap_for_each(tmp, &iter[i], fences[i]) {
++ if (!dma_fence_is_signaled(tmp)) {
++ array[count++] = dma_fence_get(tmp);
+ } else {
+- fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+- goto restart;
++ ktime_t t = dma_fence_timestamp(tmp);
++
++ if (ktime_after(t, timestamp))
++ timestamp = t;
+ }
+ }
++ }
+
+- if (tmp) {
+- array[count++] = dma_fence_get(tmp);
+- fences[sel] = dma_fence_unwrap_next(&iter[sel]);
+- }
+- } while (tmp);
++ if (count == 0 || count == 1)
++ goto return_fastpath;
+
+- if (count == 0) {
+- tmp = dma_fence_allocate_private_stub(ktime_get());
+- goto return_tmp;
+- }
++ sort(array, count, sizeof(*array), fence_cmp, NULL);
+
+- if (count == 1) {
+- tmp = array[0];
+- goto return_tmp;
++ /*
++ * Only keep the most recent fence for each context.
++ */
++ j = 0;
++ for (i = 1; i < count; i++) {
++ if (array[i]->context == array[j]->context)
++ dma_fence_put(array[i]);
++ else
++ array[++j] = array[i];
+ }
++ count = ++j;
+
+- result = dma_fence_array_create(count, array,
+- dma_fence_context_alloc(1),
+- 1, false);
+- if (!result) {
+- for (i = 0; i < count; i++)
+- dma_fence_put(array[i]);
+- tmp = NULL;
+- goto return_tmp;
++ if (count > 1) {
++ result = dma_fence_array_create(count, array,
++ dma_fence_context_alloc(1),
++ 1, false);
++ if (!result) {
++ for (i = 0; i < count; i++)
++ dma_fence_put(array[i]);
++ tmp = NULL;
++ goto return_tmp;
++ }
++ return &result->base;
+ }
+- return &result->base;
++
++return_fastpath:
++ if (count == 0)
++ tmp = dma_fence_allocate_private_stub(timestamp);
++ else
++ tmp = array[0];
+
+ return_tmp:
+ kfree(array);
--- /dev/null
+From 63e7ee677c74e981257cedfdd8543510d09096ba Mon Sep 17 00:00:00 2001
+From: Peterson Guo <peterson.guo@amd.com>
+Date: Thu, 7 Nov 2024 19:20:02 -0500
+Subject: drm/amd/display: Add a left edge pixel if in YCbCr422 or YCbCr420 and odm
+
+From: Peterson Guo <peterson.guo@amd.com>
+
+commit 63e7ee677c74e981257cedfdd8543510d09096ba upstream.
+
+[WHY]
+On some cards when odm is used, the monitor will have 2 separate pipes
+split vertically. When compression is used on the YCbCr colour space on
+the second pipe to have correct colours, we need to read a pixel from the
+end of first pipe to accurately display colours. Hardware was programmed
+properly to account for this extra pixel but it was not calculated
+properly in software causing a split screen on some monitors.
+
+[HOW]
+The fix adjusts the second pipe's viewport and timings if the pixel
+encoding is YCbCr422 or YCbCr420.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: George Shen <george.shen@amd.com>
+Signed-off-by: Peterson Guo <peterson.guo@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c | 23 ++++++++++
+ 1 file changed, 23 insertions(+)
+
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn20/dcn20_resource.c
+@@ -1511,6 +1511,7 @@ bool dcn20_split_stream_for_odm(
+
+ if (prev_odm_pipe->plane_state) {
+ struct scaler_data *sd = &prev_odm_pipe->plane_res.scl_data;
++ struct output_pixel_processor *opp = next_odm_pipe->stream_res.opp;
+ int new_width;
+
+ /* HACTIVE halved for odm combine */
+@@ -1544,7 +1545,28 @@ bool dcn20_split_stream_for_odm(
+ sd->viewport_c.x += dc_fixpt_floor(dc_fixpt_mul_int(
+ sd->ratios.horz_c, sd->h_active - sd->recout.x));
+ sd->recout.x = 0;
++
++ /*
++ * When odm is used in YcbCr422 or 420 colour space, a split screen
++ * will be seen with the previous calculations since the extra left
++ * edge pixel is accounted for in fmt but not in viewport.
++ *
++ * Below are calculations which fix the split by fixing the calculations
++ * if there is an extra left edge pixel.
++ */
++ if (opp && opp->funcs->opp_get_left_edge_extra_pixel_count
++ && opp->funcs->opp_get_left_edge_extra_pixel_count(
++ opp, next_odm_pipe->stream->timing.pixel_encoding,
++ resource_is_pipe_type(next_odm_pipe, OTG_MASTER)) == 1) {
++ sd->h_active += 1;
++ sd->recout.width += 1;
++ sd->viewport.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++ sd->viewport_c.x -= dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++ sd->viewport_c.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++ sd->viewport.width += dc_fixpt_ceil(dc_fixpt_mul_int(sd->ratios.horz, 1));
++ }
+ }
++
+ if (!next_odm_pipe->top_pipe)
+ next_odm_pipe->stream_res.opp = pool->opps[next_odm_pipe->pipe_idx];
+ else
+@@ -2133,6 +2155,7 @@ bool dcn20_fast_validate_bw(
+ ASSERT(0);
+ }
+ }
++
+ /* Actual dsc count per stream dsc validation*/
+ if (!dcn20_validate_dsc(dc, context)) {
+ context->bw_ctx.dml.vba.ValidationStatus[context->bw_ctx.dml.vba.soc.num_states] =
--- /dev/null
+From 24d3749c11d949972d8c22e75567dc90ff5482e7 Mon Sep 17 00:00:00 2001
+From: Lo-an Chen <lo-an.chen@amd.com>
+Date: Thu, 14 Nov 2024 17:53:41 +0800
+Subject: drm/amd/display: Correct prefetch calculation
+
+From: Lo-an Chen <lo-an.chen@amd.com>
+
+commit 24d3749c11d949972d8c22e75567dc90ff5482e7 upstream.
+
+[WHY]
+The minimum value of the dst_y_prefetch_equ was not correct
+in prefetch calculation whice causes OPTC underflow.
+
+[HOW]
+Add the min operation of dst_y_prefetch_equ in prefetch calculation.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Nicholas Kazlauskas <nicholas.kazlauskas@amd.com>
+Signed-off-by: Lo-an Chen <lo-an.chen@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
++++ b/drivers/gpu/drm/amd/display/dc/dml2/display_mode_core.c
+@@ -1222,6 +1222,7 @@ static dml_bool_t CalculatePrefetchSched
+ s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;
+
+ s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
++ s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH
+
+ #ifdef __DML_VBA_DEBUG__
+ dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
--- /dev/null
+From a29997b7ac1f5c816b543e0c56aa2b5b56baac24 Mon Sep 17 00:00:00 2001
+From: Dillon Varone <dillon.varone@amd.com>
+Date: Wed, 13 Nov 2024 16:44:15 -0500
+Subject: drm/amd/display: Limit VTotal range to max hw cap minus fp
+
+From: Dillon Varone <dillon.varone@amd.com>
+
+commit a29997b7ac1f5c816b543e0c56aa2b5b56baac24 upstream.
+
+[WHY & HOW]
+Hardware does not support the VTotal to be between fp2 lines of the
+maximum possible VTotal, so add a capability flag to track it and apply
+where necessary.
+
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Jun Lei <jun.lei@amd.com>
+Reviewed-by: Anthony Koo <anthony.koo@amd.com>
+Signed-off-by: Dillon Varone <dillon.varone@amd.com>
+Signed-off-by: Alex Hung <alex.hung@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/display/dc/dc.h | 1
+ drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c | 27 +++++++++-
+ drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c | 1
+ drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c | 1
+ drivers/gpu/drm/amd/display/modules/freesync/freesync.c | 13 ++++
+ 11 files changed, 46 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/amd/display/dc/dc.h
++++ b/drivers/gpu/drm/amd/display/dc/dc.h
+@@ -285,6 +285,7 @@ struct dc_caps {
+ uint16_t subvp_vertical_int_margin_us;
+ bool seamless_odm;
+ uint32_t max_v_total;
++ bool vtotal_limited_by_fp2;
+ uint32_t max_disp_clock_khz_at_vmin;
+ uint8_t subvp_drr_vblank_start_margin_us;
+ bool cursor_not_scaled;
+--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
++++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+@@ -339,11 +339,22 @@ void dml21_apply_soc_bb_overrides(struct
+ // }
+ }
+
++static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
++{
++ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
++
++ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
++ max_hw_v_total -= stream->timing.v_front_porch + 1;
++ }
++
++ return max_hw_v_total;
++}
++
+ static void populate_dml21_timing_config_from_stream_state(struct dml2_timing_cfg *timing,
+ struct dc_stream_state *stream,
+ struct dml2_context *dml_ctx)
+ {
+- unsigned int hblank_start, vblank_start;
++ unsigned int hblank_start, vblank_start, min_hardware_refresh_in_uhz;
+
+ timing->h_active = stream->timing.h_addressable + stream->timing.h_border_left + stream->timing.h_border_right;
+ timing->v_active = stream->timing.v_addressable + stream->timing.v_border_bottom + stream->timing.v_border_top;
+@@ -371,11 +382,23 @@ static void populate_dml21_timing_config
+ - stream->timing.v_border_top - stream->timing.v_border_bottom;
+
+ timing->drr_config.enabled = stream->ignore_msa_timing_param;
+- timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
+ timing->drr_config.drr_active_variable = stream->vrr_active_variable;
+ timing->drr_config.drr_active_fixed = stream->vrr_active_fixed;
+ timing->drr_config.disallowed = !stream->allow_freesync;
+
++ /* limit min refresh rate to DC cap */
++ min_hardware_refresh_in_uhz = stream->timing.min_refresh_in_uhz;
++ if (stream->ctx->dc->caps.max_v_total != 0) {
++ min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
++ (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
++ }
++
++ if (stream->timing.min_refresh_in_uhz > min_hardware_refresh_in_uhz) {
++ timing->drr_config.min_refresh_uhz = stream->timing.min_refresh_in_uhz;
++ } else {
++ timing->drr_config.min_refresh_uhz = min_hardware_refresh_in_uhz;
++ }
++
+ if (dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase &&
+ stream->ctx->dc->config.enable_fpo_flicker_detection == 1)
+ timing->drr_config.max_instant_vtotal_delta = dml_ctx->config.callbacks.get_max_flickerless_instant_vtotal_increase(stream, false);
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn30/dcn30_resource.c
+@@ -2354,6 +2354,7 @@ static bool dcn30_resource_construct(
+
+ dc->caps.dp_hdmi21_pcon_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* read VBIOS LTTPR caps */
+ {
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn302/dcn302_resource.c
+@@ -1234,6 +1234,7 @@ static bool dcn302_resource_construct(
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn303/dcn303_resource.c
+@@ -1179,6 +1179,7 @@ static bool dcn303_resource_construct(
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn32/dcn32_resource.c
+@@ -2186,6 +2186,7 @@ static bool dcn32_resource_construct(
+ dc->caps.dmcub_support = true;
+ dc->caps.seamless_odm = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn321/dcn321_resource.c
+@@ -1743,6 +1743,7 @@ static bool dcn321_resource_construct(
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn35/dcn35_resource.c
+@@ -1850,6 +1850,7 @@ static bool dcn35_resource_construct(
+ dc->caps.zstate_support = true;
+ dc->caps.ips_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn351/dcn351_resource.c
+@@ -1829,6 +1829,7 @@ static bool dcn351_resource_construct(
+ dc->caps.zstate_support = true;
+ dc->caps.ips_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ /* Color pipeline capabilities */
+ dc->caps.color.dpp.dcn_arch = 1;
+--- a/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
++++ b/drivers/gpu/drm/amd/display/dc/resource/dcn401/dcn401_resource.c
+@@ -1826,6 +1826,7 @@ static bool dcn401_resource_construct(
+ dc->caps.extended_aux_timeout_support = true;
+ dc->caps.dmcub_support = true;
+ dc->caps.max_v_total = (1 << 15) - 1;
++ dc->caps.vtotal_limited_by_fp2 = true;
+
+ if (ASICREV_IS_GC_12_0_1_A0(dc->ctx->asic_id.hw_internal_rev))
+ dc->caps.dcc_plane_width_limit = 7680;
+--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
++++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+@@ -121,6 +121,17 @@ static unsigned int calc_duration_in_us_
+ return duration_in_us;
+ }
+
++static unsigned int calc_max_hardware_v_total(const struct dc_stream_state *stream)
++{
++ unsigned int max_hw_v_total = stream->ctx->dc->caps.max_v_total;
++
++ if (stream->ctx->dc->caps.vtotal_limited_by_fp2) {
++ max_hw_v_total -= stream->timing.v_front_porch + 1;
++ }
++
++ return max_hw_v_total;
++}
++
+ unsigned int mod_freesync_calc_v_total_from_refresh(
+ const struct dc_stream_state *stream,
+ unsigned int refresh_in_uhz)
+@@ -1002,7 +1013,7 @@ void mod_freesync_build_vrr_params(struc
+
+ if (stream->ctx->dc->caps.max_v_total != 0 && stream->timing.h_total != 0) {
+ min_hardware_refresh_in_uhz = div64_u64((stream->timing.pix_clk_100hz * 100000000ULL),
+- (stream->timing.h_total * (long long)stream->ctx->dc->caps.max_v_total));
++ (stream->timing.h_total * (long long)calc_max_hardware_v_total(stream)));
+ }
+ /* Limit minimum refresh rate to what can be supported by hardware */
+ min_refresh_in_uhz = min_hardware_refresh_in_uhz > in_config->min_refresh_in_uhz ?
--- /dev/null
+From 1443dd3c67f6d1a8bd1f810e598e2f0c6f19205c Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Sat, 16 Nov 2024 08:20:59 -0500
+Subject: drm/amd/pm: fix and simplify workload handling
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 1443dd3c67f6d1a8bd1f810e598e2f0c6f19205c upstream.
+
+smu->workload_mask is IP specific and should not be messed with in
+the common code. The mask bits vary across SMU versions.
+
+Move all handling of smu->workload_mask in to the backends and
+simplify the code. Store the user's preference in smu->power_profile_mode
+which will be reflected in sysfs. For internal driver profile
+switches for KFD or VCN, just update the workload mask so that the
+user's preference is retained. Remove all of the extra now unused
+workload related elements in the smu structure.
+
+v2: use refcounts for workload profiles
+v3: rework based on feedback from Lijo
+v4: fix the refcount on failure, drop backend mask
+v5: rework custom handling
+v6: handle failure cleanup with custom profile
+v7: Update documentation
+
+Reviewed-by: Lijo Lazar <lijo.lazar@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: Kenneth Feng <kenneth.feng@amd.com>
+Cc: Lijo Lazar <lijo.lazar@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/pm/amdgpu_pm.c | 6
+ drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c | 148 ++++++++-----
+ drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h | 15 -
+ drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c | 166 ++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c | 167 +++++++++------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c | 168 +++++++++------
+ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 41 +--
+ drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c | 43 +--
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 167 ++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c | 138 +++++++-----
+ drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c | 176 +++++++++-------
+ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c | 25 ++
+ drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h | 4
+ 13 files changed, 744 insertions(+), 520 deletions(-)
+
+--- a/drivers/gpu/drm/amd/pm/amdgpu_pm.c
++++ b/drivers/gpu/drm/amd/pm/amdgpu_pm.c
+@@ -1409,7 +1409,11 @@ static ssize_t amdgpu_set_pp_mclk_od(str
+ * create a custom set of heuristics, write a string of numbers to the file
+ * starting with the number of the custom profile along with a setting
+ * for each heuristic parameter. Due to differences across asic families
+- * the heuristic parameters vary from family to family.
++ * the heuristic parameters vary from family to family. Additionally,
++ * you can apply the custom heuristics to different clock domains. Each
++ * clock domain is considered a distinct operation so if you modify the
++ * gfxclk heuristics and then the memclk heuristics, the all of the
++ * custom heuristics will be retained until you switch to another profile.
+ *
+ */
+
+--- a/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
+@@ -72,6 +72,10 @@ static int smu_set_power_limit(void *han
+ static int smu_set_fan_speed_rpm(void *handle, uint32_t speed);
+ static int smu_set_gfx_cgpg(struct smu_context *smu, bool enabled);
+ static int smu_set_mp1_state(void *handle, enum pp_mp1_state mp1_state);
++static void smu_power_profile_mode_get(struct smu_context *smu,
++ enum PP_SMC_POWER_PROFILE profile_mode);
++static void smu_power_profile_mode_put(struct smu_context *smu,
++ enum PP_SMC_POWER_PROFILE profile_mode);
+
+ static int smu_sys_get_pp_feature_mask(void *handle,
+ char *buf)
+@@ -1257,35 +1261,19 @@ static int smu_sw_init(void *handle)
+ INIT_WORK(&smu->interrupt_work, smu_interrupt_work_fn);
+ atomic64_set(&smu->throttle_int_counter, 0);
+ smu->watermarks_bitmap = 0;
+- smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+- smu->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+
+ atomic_set(&smu->smu_power.power_gate.vcn_gated, 1);
+ atomic_set(&smu->smu_power.power_gate.jpeg_gated, 1);
+ atomic_set(&smu->smu_power.power_gate.vpe_gated, 1);
+ atomic_set(&smu->smu_power.power_gate.umsch_mm_gated, 1);
+
+- smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
+- smu->workload_prority[PP_SMC_POWER_PROFILE_CUSTOM] = 6;
+-
+ if (smu->is_apu ||
+ !smu_is_workload_profile_available(smu, PP_SMC_POWER_PROFILE_FULLSCREEN3D))
+- smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
++ smu->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+ else
+- smu->workload_mask = 1 << smu->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D];
++ smu->power_profile_mode = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
++ smu_power_profile_mode_get(smu, smu->power_profile_mode);
+
+- smu->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+- smu->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+- smu->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
+- smu->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
+- smu->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
+- smu->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
+- smu->workload_setting[6] = PP_SMC_POWER_PROFILE_CUSTOM;
+ smu->display_config = &adev->pm.pm_display_cfg;
+
+ smu->smu_dpm.dpm_level = AMD_DPM_FORCED_LEVEL_AUTO;
+@@ -1338,6 +1326,11 @@ static int smu_sw_fini(void *handle)
+ return ret;
+ }
+
++ if (smu->custom_profile_params) {
++ kfree(smu->custom_profile_params);
++ smu->custom_profile_params = NULL;
++ }
++
+ smu_fini_microcode(smu);
+
+ return 0;
+@@ -2117,6 +2110,9 @@ static int smu_suspend(void *handle)
+ if (!ret)
+ adev->gfx.gfx_off_entrycount = count;
+
++ /* clear this on suspend so it will get reprogrammed on resume */
++ smu->workload_mask = 0;
++
+ return 0;
+ }
+
+@@ -2229,25 +2225,49 @@ static int smu_enable_umd_pstate(void *h
+ }
+
+ static int smu_bump_power_profile_mode(struct smu_context *smu,
+- long *param,
+- uint32_t param_size)
++ long *custom_params,
++ u32 custom_params_max_idx)
+ {
+- int ret = 0;
++ u32 workload_mask = 0;
++ int i, ret = 0;
++
++ for (i = 0; i < PP_SMC_POWER_PROFILE_COUNT; i++) {
++ if (smu->workload_refcount[i])
++ workload_mask |= 1 << i;
++ }
++
++ if (smu->workload_mask == workload_mask)
++ return 0;
+
+ if (smu->ppt_funcs->set_power_profile_mode)
+- ret = smu->ppt_funcs->set_power_profile_mode(smu, param, param_size);
++ ret = smu->ppt_funcs->set_power_profile_mode(smu, workload_mask,
++ custom_params,
++ custom_params_max_idx);
++
++ if (!ret)
++ smu->workload_mask = workload_mask;
+
+ return ret;
+ }
+
++static void smu_power_profile_mode_get(struct smu_context *smu,
++ enum PP_SMC_POWER_PROFILE profile_mode)
++{
++ smu->workload_refcount[profile_mode]++;
++}
++
++static void smu_power_profile_mode_put(struct smu_context *smu,
++ enum PP_SMC_POWER_PROFILE profile_mode)
++{
++ if (smu->workload_refcount[profile_mode])
++ smu->workload_refcount[profile_mode]--;
++}
++
+ static int smu_adjust_power_state_dynamic(struct smu_context *smu,
+ enum amd_dpm_forced_level level,
+- bool skip_display_settings,
+- bool init)
++ bool skip_display_settings)
+ {
+ int ret = 0;
+- int index = 0;
+- long workload[1];
+ struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+
+ if (!skip_display_settings) {
+@@ -2284,14 +2304,8 @@ static int smu_adjust_power_state_dynami
+ }
+
+ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
+- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
+- index = fls(smu->workload_mask);
+- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+- workload[0] = smu->workload_setting[index];
+-
+- if (init || smu->power_profile_mode != workload[0])
+- smu_bump_power_profile_mode(smu, workload, 0);
+- }
++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
++ smu_bump_power_profile_mode(smu, NULL, 0);
+
+ return ret;
+ }
+@@ -2310,13 +2324,13 @@ static int smu_handle_task(struct smu_co
+ ret = smu_pre_display_config_changed(smu);
+ if (ret)
+ return ret;
+- ret = smu_adjust_power_state_dynamic(smu, level, false, false);
++ ret = smu_adjust_power_state_dynamic(smu, level, false);
+ break;
+ case AMD_PP_TASK_COMPLETE_INIT:
+- ret = smu_adjust_power_state_dynamic(smu, level, true, true);
++ ret = smu_adjust_power_state_dynamic(smu, level, true);
+ break;
+ case AMD_PP_TASK_READJUST_POWER_STATE:
+- ret = smu_adjust_power_state_dynamic(smu, level, true, false);
++ ret = smu_adjust_power_state_dynamic(smu, level, true);
+ break;
+ default:
+ break;
+@@ -2338,12 +2352,11 @@ static int smu_handle_dpm_task(void *han
+
+ static int smu_switch_power_profile(void *handle,
+ enum PP_SMC_POWER_PROFILE type,
+- bool en)
++ bool enable)
+ {
+ struct smu_context *smu = handle;
+ struct smu_dpm_context *smu_dpm_ctx = &(smu->smu_dpm);
+- long workload[1];
+- uint32_t index;
++ int ret;
+
+ if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled)
+ return -EOPNOTSUPP;
+@@ -2351,21 +2364,21 @@ static int smu_switch_power_profile(void
+ if (!(type < PP_SMC_POWER_PROFILE_CUSTOM))
+ return -EINVAL;
+
+- if (!en) {
+- smu->workload_mask &= ~(1 << smu->workload_prority[type]);
+- index = fls(smu->workload_mask);
+- index = index > 0 && index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+- workload[0] = smu->workload_setting[index];
+- } else {
+- smu->workload_mask |= (1 << smu->workload_prority[type]);
+- index = fls(smu->workload_mask);
+- index = index <= WORKLOAD_POLICY_MAX ? index - 1 : 0;
+- workload[0] = smu->workload_setting[index];
+- }
+-
+ if (smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL &&
+- smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM)
+- smu_bump_power_profile_mode(smu, workload, 0);
++ smu_dpm_ctx->dpm_level != AMD_DPM_FORCED_LEVEL_PERF_DETERMINISM) {
++ if (enable)
++ smu_power_profile_mode_get(smu, type);
++ else
++ smu_power_profile_mode_put(smu, type);
++ ret = smu_bump_power_profile_mode(smu, NULL, 0);
++ if (ret) {
++ if (enable)
++ smu_power_profile_mode_put(smu, type);
++ else
++ smu_power_profile_mode_get(smu, type);
++ return ret;
++ }
++ }
+
+ return 0;
+ }
+@@ -3053,12 +3066,35 @@ static int smu_set_power_profile_mode(vo
+ uint32_t param_size)
+ {
+ struct smu_context *smu = handle;
++ bool custom = false;
++ int ret = 0;
+
+ if (!smu->pm_enabled || !smu->adev->pm.dpm_enabled ||
+ !smu->ppt_funcs->set_power_profile_mode)
+ return -EOPNOTSUPP;
+
+- return smu_bump_power_profile_mode(smu, param, param_size);
++ if (param[param_size] == PP_SMC_POWER_PROFILE_CUSTOM) {
++ custom = true;
++ /* clear frontend mask so custom changes propogate */
++ smu->workload_mask = 0;
++ }
++
++ if ((param[param_size] != smu->power_profile_mode) || custom) {
++ /* clear the old user preference */
++ smu_power_profile_mode_put(smu, smu->power_profile_mode);
++ /* set the new user preference */
++ smu_power_profile_mode_get(smu, param[param_size]);
++ ret = smu_bump_power_profile_mode(smu,
++ custom ? param : NULL,
++ custom ? param_size : 0);
++ if (ret)
++ smu_power_profile_mode_put(smu, param[param_size]);
++ else
++ /* store the user's preference */
++ smu->power_profile_mode = param[param_size];
++ }
++
++ return ret;
+ }
+
+ static int smu_get_fan_control_mode(void *handle, u32 *fan_mode)
+--- a/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/inc/amdgpu_smu.h
+@@ -556,11 +556,13 @@ struct smu_context {
+ uint32_t hard_min_uclk_req_from_dal;
+ bool disable_uclk_switch;
+
++ /* asic agnostic workload mask */
+ uint32_t workload_mask;
+- uint32_t workload_prority[WORKLOAD_POLICY_MAX];
+- uint32_t workload_setting[WORKLOAD_POLICY_MAX];
++ /* default/user workload preference */
+ uint32_t power_profile_mode;
+- uint32_t default_power_profile_mode;
++ uint32_t workload_refcount[PP_SMC_POWER_PROFILE_COUNT];
++ /* backend specific custom workload settings */
++ long *custom_profile_params;
+ bool pm_enabled;
+ bool is_apu;
+
+@@ -731,9 +733,12 @@ struct pptable_funcs {
+ * @set_power_profile_mode: Set a power profile mode. Also used to
+ * create/set custom power profile modes.
+ * &input: Power profile mode parameters.
+- * &size: Size of &input.
++ * &workload_mask: mask of workloads to enable
++ * &custom_params: custom profile parameters
++ * &custom_params_max_idx: max valid idx into custom_params
+ */
+- int (*set_power_profile_mode)(struct smu_context *smu, long *input, uint32_t size);
++ int (*set_power_profile_mode)(struct smu_context *smu, u32 workload_mask,
++ long *custom_params, u32 custom_params_max_idx);
+
+ /**
+ * @dpm_set_vcn_enable: Enable/disable VCN engine dynamic power
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
+@@ -1445,98 +1445,120 @@ static int arcturus_get_power_profile_mo
+ return size;
+ }
+
+-static int arcturus_set_power_profile_mode(struct smu_context *smu,
+- long *input,
+- uint32_t size)
++#define ARCTURUS_CUSTOM_PARAMS_COUNT 10
++#define ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define ARCTURUS_CUSTOM_PARAMS_SIZE (ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT * ARCTURUS_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int arcturus_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+ DpmActivityMonitorCoeffInt_t activity_monitor;
+- int workload_type = 0;
+- uint32_t profile_mode = input[size];
+- int ret = 0;
++ int ret, idx;
+
+- if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor),
++ false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
+ }
+
++ idx = 0 * ARCTURUS_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor.Gfx_FPS = input[idx + 1];
++ activity_monitor.Gfx_UseRlcBusy = input[idx + 2];
++ activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
++ activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
++ activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
++ activity_monitor.Gfx_BoosterFreq = input[idx + 6];
++ activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
++ activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++ }
++ idx = 1 * ARCTURUS_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Uclk */
++ activity_monitor.Mem_FPS = input[idx + 1];
++ activity_monitor.Mem_UseRlcBusy = input[idx + 2];
++ activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
++ activity_monitor.Mem_MinActiveFreq = input[idx + 4];
++ activity_monitor.Mem_BoosterFreqType = input[idx + 5];
++ activity_monitor.Mem_BoosterFreq = input[idx + 6];
++ activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
++ activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
++ }
+
+- if ((profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) &&
+- (smu->smc_fw_version >= 0x360d00)) {
+- if (size != 10)
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor),
++ true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor),
+- false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
++ return ret;
++}
++
++static int arcturus_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int ret, idx = -1, i;
++
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
+
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor.Gfx_FPS = input[1];
+- activity_monitor.Gfx_UseRlcBusy = input[2];
+- activity_monitor.Gfx_MinActiveFreqType = input[3];
+- activity_monitor.Gfx_MinActiveFreq = input[4];
+- activity_monitor.Gfx_BoosterFreqType = input[5];
+- activity_monitor.Gfx_BoosterFreq = input[6];
+- activity_monitor.Gfx_PD_Data_limit_c = input[7];
+- activity_monitor.Gfx_PD_Data_error_coeff = input[8];
+- activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+- break;
+- case 1: /* Uclk */
+- activity_monitor.Mem_FPS = input[1];
+- activity_monitor.Mem_UseRlcBusy = input[2];
+- activity_monitor.Mem_MinActiveFreqType = input[3];
+- activity_monitor.Mem_MinActiveFreq = input[4];
+- activity_monitor.Mem_BoosterFreqType = input[5];
+- activity_monitor.Mem_BoosterFreq = input[6];
+- activity_monitor.Mem_PD_Data_limit_c = input[7];
+- activity_monitor.Mem_PD_Data_error_coeff = input[8];
+- activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+- break;
+- default:
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (smu->smc_fw_version < 0x360d00)
+ return -EINVAL;
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params =
++ kzalloc(ARCTURUS_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
+ }
+-
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor),
+- true);
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != ARCTURUS_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= ARCTURUS_CUSTOM_PARAMS_CLOCK_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * ARCTURUS_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = arcturus_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
+ if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
+- }
+-
+- /*
+- * Conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT
+- * Not all profile modes are supported on arcturus.
+- */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- profile_mode);
+- if (workload_type < 0) {
+- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
+- return -EINVAL;
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, ARCTURUS_CUSTOM_PARAMS_SIZE);
+ }
+
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_SetWorkloadMask,
+- 1 << workload_type,
+- NULL);
++ SMU_MSG_SetWorkloadMask,
++ backend_workload_mask,
++ NULL);
+ if (ret) {
+- dev_err(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
+
+- smu->power_profile_mode = profile_mode;
+-
+- return 0;
++ return ret;
+ }
+
+ static int arcturus_set_performance_level(struct smu_context *smu,
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/navi10_ppt.c
+@@ -2004,87 +2004,122 @@ static int navi10_get_power_profile_mode
+ return size;
+ }
+
+-static int navi10_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define NAVI10_CUSTOM_PARAMS_COUNT 10
++#define NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT 3
++#define NAVI10_CUSTOM_PARAMS_SIZE (NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT * NAVI10_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int navi10_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+ DpmActivityMonitorCoeffInt_t activity_monitor;
+- int workload_type, ret = 0;
++ int ret, idx;
+
+- smu->power_profile_mode = input[size];
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor), false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
++ }
+
+- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+- return -EINVAL;
++ idx = 0 * NAVI10_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor.Gfx_FPS = input[idx + 1];
++ activity_monitor.Gfx_MinFreqStep = input[idx + 2];
++ activity_monitor.Gfx_MinActiveFreqType = input[idx + 3];
++ activity_monitor.Gfx_MinActiveFreq = input[idx + 4];
++ activity_monitor.Gfx_BoosterFreqType = input[idx + 5];
++ activity_monitor.Gfx_BoosterFreq = input[idx + 6];
++ activity_monitor.Gfx_PD_Data_limit_c = input[idx + 7];
++ activity_monitor.Gfx_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor.Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++ }
++ idx = 1 * NAVI10_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Socclk */
++ activity_monitor.Soc_FPS = input[idx + 1];
++ activity_monitor.Soc_MinFreqStep = input[idx + 2];
++ activity_monitor.Soc_MinActiveFreqType = input[idx + 3];
++ activity_monitor.Soc_MinActiveFreq = input[idx + 4];
++ activity_monitor.Soc_BoosterFreqType = input[idx + 5];
++ activity_monitor.Soc_BoosterFreq = input[idx + 6];
++ activity_monitor.Soc_PD_Data_limit_c = input[idx + 7];
++ activity_monitor.Soc_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor.Soc_PD_Data_error_rate_coeff = input[idx + 9];
++ }
++ idx = 2 * NAVI10_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Memclk */
++ activity_monitor.Mem_FPS = input[idx + 1];
++ activity_monitor.Mem_MinFreqStep = input[idx + 2];
++ activity_monitor.Mem_MinActiveFreqType = input[idx + 3];
++ activity_monitor.Mem_MinActiveFreq = input[idx + 4];
++ activity_monitor.Mem_BoosterFreqType = input[idx + 5];
++ activity_monitor.Mem_BoosterFreq = input[idx + 6];
++ activity_monitor.Mem_PD_Data_limit_c = input[idx + 7];
++ activity_monitor.Mem_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor.Mem_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+- if (size != 10)
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor), true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor), false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
++ return ret;
++}
+
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor.Gfx_FPS = input[1];
+- activity_monitor.Gfx_MinFreqStep = input[2];
+- activity_monitor.Gfx_MinActiveFreqType = input[3];
+- activity_monitor.Gfx_MinActiveFreq = input[4];
+- activity_monitor.Gfx_BoosterFreqType = input[5];
+- activity_monitor.Gfx_BoosterFreq = input[6];
+- activity_monitor.Gfx_PD_Data_limit_c = input[7];
+- activity_monitor.Gfx_PD_Data_error_coeff = input[8];
+- activity_monitor.Gfx_PD_Data_error_rate_coeff = input[9];
+- break;
+- case 1: /* Socclk */
+- activity_monitor.Soc_FPS = input[1];
+- activity_monitor.Soc_MinFreqStep = input[2];
+- activity_monitor.Soc_MinActiveFreqType = input[3];
+- activity_monitor.Soc_MinActiveFreq = input[4];
+- activity_monitor.Soc_BoosterFreqType = input[5];
+- activity_monitor.Soc_BoosterFreq = input[6];
+- activity_monitor.Soc_PD_Data_limit_c = input[7];
+- activity_monitor.Soc_PD_Data_error_coeff = input[8];
+- activity_monitor.Soc_PD_Data_error_rate_coeff = input[9];
+- break;
+- case 2: /* Memclk */
+- activity_monitor.Mem_FPS = input[1];
+- activity_monitor.Mem_MinFreqStep = input[2];
+- activity_monitor.Mem_MinActiveFreqType = input[3];
+- activity_monitor.Mem_MinActiveFreq = input[4];
+- activity_monitor.Mem_BoosterFreqType = input[5];
+- activity_monitor.Mem_BoosterFreq = input[6];
+- activity_monitor.Mem_PD_Data_limit_c = input[7];
+- activity_monitor.Mem_PD_Data_error_coeff = input[8];
+- activity_monitor.Mem_PD_Data_error_rate_coeff = input[9];
+- break;
+- default:
+- return -EINVAL;
+- }
++static int navi10_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int ret, idx = -1, i;
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor), true);
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
++
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params = kzalloc(NAVI10_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
++ }
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != NAVI10_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= NAVI10_CUSTOM_PARAMS_CLOCKS_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * NAVI10_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = navi10_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
+ if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, NAVI10_CUSTOM_PARAMS_SIZE);
+ }
+
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- smu->power_profile_mode);
+- if (workload_type < 0)
+- return -EINVAL;
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+- 1 << workload_type, NULL);
+- if (ret)
+- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
++ backend_workload_mask, NULL);
++ if (ret) {
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
+
+ return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/sienna_cichlid_ppt.c
+@@ -1706,90 +1706,126 @@ static int sienna_cichlid_get_power_prof
+ return size;
+ }
+
+-static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define SIENNA_CICHLID_CUSTOM_PARAMS_COUNT 10
++#define SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT 3
++#define SIENNA_CICHLID_CUSTOM_PARAMS_SIZE (SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int sienna_cichlid_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+
+ DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+ DpmActivityMonitorCoeffInt_t *activity_monitor =
+ &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+- int workload_type, ret = 0;
++ int ret, idx;
+
+- smu->power_profile_mode = input[size];
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external), false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
++ }
+
+- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+- return -EINVAL;
++ idx = 0 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor->Gfx_FPS = input[idx + 1];
++ activity_monitor->Gfx_MinFreqStep = input[idx + 2];
++ activity_monitor->Gfx_MinActiveFreqType = input[idx + 3];
++ activity_monitor->Gfx_MinActiveFreq = input[idx + 4];
++ activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
++ activity_monitor->Gfx_BoosterFreq = input[idx + 6];
++ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 7];
++ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 9];
++ }
++ idx = 1 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Socclk */
++ activity_monitor->Fclk_FPS = input[idx + 1];
++ activity_monitor->Fclk_MinFreqStep = input[idx + 2];
++ activity_monitor->Fclk_MinActiveFreqType = input[idx + 3];
++ activity_monitor->Fclk_MinActiveFreq = input[idx + 4];
++ activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
++ activity_monitor->Fclk_BoosterFreq = input[idx + 6];
++ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 7];
++ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 9];
++ }
++ idx = 2 * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Memclk */
++ activity_monitor->Mem_FPS = input[idx + 1];
++ activity_monitor->Mem_MinFreqStep = input[idx + 2];
++ activity_monitor->Mem_MinActiveFreqType = input[idx + 3];
++ activity_monitor->Mem_MinActiveFreq = input[idx + 4];
++ activity_monitor->Mem_BoosterFreqType = input[idx + 5];
++ activity_monitor->Mem_BoosterFreq = input[idx + 6];
++ activity_monitor->Mem_PD_Data_limit_c = input[idx + 7];
++ activity_monitor->Mem_PD_Data_error_coeff = input[idx + 8];
++ activity_monitor->Mem_PD_Data_error_rate_coeff = input[idx + 9];
+ }
+
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+- if (size != 10)
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external), true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external), false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
++ return ret;
++}
+
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor->Gfx_FPS = input[1];
+- activity_monitor->Gfx_MinFreqStep = input[2];
+- activity_monitor->Gfx_MinActiveFreqType = input[3];
+- activity_monitor->Gfx_MinActiveFreq = input[4];
+- activity_monitor->Gfx_BoosterFreqType = input[5];
+- activity_monitor->Gfx_BoosterFreq = input[6];
+- activity_monitor->Gfx_PD_Data_limit_c = input[7];
+- activity_monitor->Gfx_PD_Data_error_coeff = input[8];
+- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[9];
+- break;
+- case 1: /* Socclk */
+- activity_monitor->Fclk_FPS = input[1];
+- activity_monitor->Fclk_MinFreqStep = input[2];
+- activity_monitor->Fclk_MinActiveFreqType = input[3];
+- activity_monitor->Fclk_MinActiveFreq = input[4];
+- activity_monitor->Fclk_BoosterFreqType = input[5];
+- activity_monitor->Fclk_BoosterFreq = input[6];
+- activity_monitor->Fclk_PD_Data_limit_c = input[7];
+- activity_monitor->Fclk_PD_Data_error_coeff = input[8];
+- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[9];
+- break;
+- case 2: /* Memclk */
+- activity_monitor->Mem_FPS = input[1];
+- activity_monitor->Mem_MinFreqStep = input[2];
+- activity_monitor->Mem_MinActiveFreqType = input[3];
+- activity_monitor->Mem_MinActiveFreq = input[4];
+- activity_monitor->Mem_BoosterFreqType = input[5];
+- activity_monitor->Mem_BoosterFreq = input[6];
+- activity_monitor->Mem_PD_Data_limit_c = input[7];
+- activity_monitor->Mem_PD_Data_error_coeff = input[8];
+- activity_monitor->Mem_PD_Data_error_rate_coeff = input[9];
+- break;
+- default:
+- return -EINVAL;
+- }
++static int sienna_cichlid_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int ret, idx = -1, i;
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external), true);
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
++
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params =
++ kzalloc(SIENNA_CICHLID_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
++ }
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != SIENNA_CICHLID_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= SIENNA_CICHLID_CUSTOM_PARAMS_CLOCK_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * SIENNA_CICHLID_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = sienna_cichlid_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
+ if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, SIENNA_CICHLID_CUSTOM_PARAMS_SIZE);
+ }
+
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- smu->power_profile_mode);
+- if (workload_type < 0)
+- return -EINVAL;
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+- 1 << workload_type, NULL);
+- if (ret)
+- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
++ backend_workload_mask, NULL);
++ if (ret) {
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
+
+ return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+@@ -1054,42 +1054,27 @@ static int vangogh_get_power_profile_mod
+ return size;
+ }
+
+-static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++static int vangogh_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
+ {
+- int workload_type, ret;
+- uint32_t profile_mode = input[size];
++ u32 backend_workload_mask = 0;
++ int ret;
+
+- if (profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+- return -EINVAL;
+- }
+-
+- if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
+- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
+- return 0;
+-
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- profile_mode);
+- if (workload_type < 0) {
+- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
+- profile_mode);
+- return -EINVAL;
+- }
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
+- 1 << workload_type,
+- NULL);
++ backend_workload_mask,
++ NULL);
+ if (ret) {
+- dev_err_once(smu->adev->dev, "Fail to set workload type %d\n",
+- workload_type);
++ dev_err_once(smu->adev->dev, "Fail to set workload mask 0x%08x\n",
++ workload_mask);
+ return ret;
+ }
+
+- smu->power_profile_mode = profile_mode;
+-
+- return 0;
++ return ret;
+ }
+
+ static int vangogh_set_soft_freq_limited_range(struct smu_context *smu,
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
+@@ -862,44 +862,27 @@ static int renoir_force_clk_levels(struc
+ return ret;
+ }
+
+-static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++static int renoir_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
+ {
+- int workload_type, ret;
+- uint32_t profile_mode = input[size];
++ int ret;
++ u32 backend_workload_mask = 0;
+
+- if (profile_mode > PP_SMC_POWER_PROFILE_CUSTOM) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", profile_mode);
+- return -EINVAL;
+- }
+-
+- if (profile_mode == PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT ||
+- profile_mode == PP_SMC_POWER_PROFILE_POWERSAVING)
+- return 0;
+-
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- profile_mode);
+- if (workload_type < 0) {
+- /*
+- * TODO: If some case need switch to powersave/default power mode
+- * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
+- */
+- dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
+- return -EINVAL;
+- }
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
+
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_ActiveProcessNotify,
+- 1 << workload_type,
+- NULL);
++ backend_workload_mask,
++ NULL);
+ if (ret) {
+- dev_err_once(smu->adev->dev, "Fail to set workload type %d\n", workload_type);
++ dev_err_once(smu->adev->dev, "Failed to set workload mask 0x08%x\n",
++ workload_mask);
+ return ret;
+ }
+
+- smu->power_profile_mode = profile_mode;
+-
+- return 0;
++ return ret;
+ }
+
+ static int renoir_set_peak_clock_by_device(struct smu_context *smu)
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -2477,82 +2477,76 @@ static int smu_v13_0_0_get_power_profile
+ return size;
+ }
+
+-static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+- long *input,
+- uint32_t size)
++#define SMU_13_0_0_CUSTOM_PARAMS_COUNT 9
++#define SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_13_0_0_CUSTOM_PARAMS_SIZE (SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_0_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v13_0_0_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+ DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+ DpmActivityMonitorCoeffInt_t *activity_monitor =
+ &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+- int workload_type, ret = 0;
+- u32 workload_mask, selected_workload_mask;
++ int ret, idx;
+
+- smu->power_profile_mode = input[size];
+-
+- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external),
++ false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
+ }
+
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+- if (size != 9)
+- return -EINVAL;
+-
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external),
+- false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
+-
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor->Gfx_FPS = input[1];
+- activity_monitor->Gfx_MinActiveFreqType = input[2];
+- activity_monitor->Gfx_MinActiveFreq = input[3];
+- activity_monitor->Gfx_BoosterFreqType = input[4];
+- activity_monitor->Gfx_BoosterFreq = input[5];
+- activity_monitor->Gfx_PD_Data_limit_c = input[6];
+- activity_monitor->Gfx_PD_Data_error_coeff = input[7];
+- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
+- break;
+- case 1: /* Fclk */
+- activity_monitor->Fclk_FPS = input[1];
+- activity_monitor->Fclk_MinActiveFreqType = input[2];
+- activity_monitor->Fclk_MinActiveFreq = input[3];
+- activity_monitor->Fclk_BoosterFreqType = input[4];
+- activity_monitor->Fclk_BoosterFreq = input[5];
+- activity_monitor->Fclk_PD_Data_limit_c = input[6];
+- activity_monitor->Fclk_PD_Data_error_coeff = input[7];
+- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
+- break;
+- default:
+- return -EINVAL;
+- }
++ idx = 0 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor->Gfx_FPS = input[idx + 1];
++ activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
++ activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
++ activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
++ activity_monitor->Gfx_BoosterFreq = input[idx + 5];
++ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
++ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
++ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
++ }
++ idx = 1 * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Fclk */
++ activity_monitor->Fclk_FPS = input[idx + 1];
++ activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
++ activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
++ activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
++ activity_monitor->Fclk_BoosterFreq = input[idx + 5];
++ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
++ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
++ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external),
+- true);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+- return ret;
+- }
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external),
++ true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
+ }
+
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- smu->power_profile_mode);
++ return ret;
++}
+
+- if (workload_type < 0)
+- return -EINVAL;
++static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int workload_type, ret, idx = -1, i;
+
+- selected_workload_mask = workload_mask = 1 << workload_type;
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
+
+ /* Add optimizations for SMU13.0.0/10. Reuse the power saving profile */
+ if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+@@ -2564,15 +2558,48 @@ static int smu_v13_0_0_set_power_profile
+ CMN2ASIC_MAPPING_WORKLOAD,
+ PP_SMC_POWER_PROFILE_POWERSAVING);
+ if (workload_type >= 0)
+- workload_mask |= 1 << workload_type;
++ backend_workload_mask |= 1 << workload_type;
++ }
++
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params =
++ kzalloc(SMU_13_0_0_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
++ }
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != SMU_13_0_0_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= SMU_13_0_0_CUSTOM_PARAMS_CLOCK_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * SMU_13_0_0_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = smu_v13_0_0_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
++ if (ret) {
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, SMU_13_0_0_CUSTOM_PARAMS_SIZE);
+ }
+
+ ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_SetWorkloadMask,
+- workload_mask,
+- NULL);
+- if (!ret)
+- smu->workload_mask = selected_workload_mask;
++ SMU_MSG_SetWorkloadMask,
++ backend_workload_mask,
++ NULL);
++ if (ret) {
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
+
+ return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_7_ppt.c
+@@ -2436,78 +2436,110 @@ out:
+ return result;
+ }
+
+-static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu, long *input, uint32_t size)
++#define SMU_13_0_7_CUSTOM_PARAMS_COUNT 8
++#define SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_13_0_7_CUSTOM_PARAMS_SIZE (SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT * SMU_13_0_7_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v13_0_7_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+
+ DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+ DpmActivityMonitorCoeffInt_t *activity_monitor =
+ &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+- int workload_type, ret = 0;
++ int ret, idx;
+
+- smu->power_profile_mode = input[size];
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external), false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
++ }
+
+- if (smu->power_profile_mode > PP_SMC_POWER_PROFILE_WINDOW3D) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+- return -EINVAL;
++ idx = 0 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor->Gfx_ActiveHystLimit = input[idx + 1];
++ activity_monitor->Gfx_IdleHystLimit = input[idx + 2];
++ activity_monitor->Gfx_FPS = input[idx + 3];
++ activity_monitor->Gfx_MinActiveFreqType = input[idx + 4];
++ activity_monitor->Gfx_BoosterFreqType = input[idx + 5];
++ activity_monitor->Gfx_MinActiveFreq = input[idx + 6];
++ activity_monitor->Gfx_BoosterFreq = input[idx + 7];
++ }
++ idx = 1 * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Fclk */
++ activity_monitor->Fclk_ActiveHystLimit = input[idx + 1];
++ activity_monitor->Fclk_IdleHystLimit = input[idx + 2];
++ activity_monitor->Fclk_FPS = input[idx + 3];
++ activity_monitor->Fclk_MinActiveFreqType = input[idx + 4];
++ activity_monitor->Fclk_BoosterFreqType = input[idx + 5];
++ activity_monitor->Fclk_MinActiveFreq = input[idx + 6];
++ activity_monitor->Fclk_BoosterFreq = input[idx + 7];
+ }
+
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+- if (size != 8)
+- return -EINVAL;
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external), true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external), false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
++ return ret;
++}
+
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor->Gfx_ActiveHystLimit = input[1];
+- activity_monitor->Gfx_IdleHystLimit = input[2];
+- activity_monitor->Gfx_FPS = input[3];
+- activity_monitor->Gfx_MinActiveFreqType = input[4];
+- activity_monitor->Gfx_BoosterFreqType = input[5];
+- activity_monitor->Gfx_MinActiveFreq = input[6];
+- activity_monitor->Gfx_BoosterFreq = input[7];
+- break;
+- case 1: /* Fclk */
+- activity_monitor->Fclk_ActiveHystLimit = input[1];
+- activity_monitor->Fclk_IdleHystLimit = input[2];
+- activity_monitor->Fclk_FPS = input[3];
+- activity_monitor->Fclk_MinActiveFreqType = input[4];
+- activity_monitor->Fclk_BoosterFreqType = input[5];
+- activity_monitor->Fclk_MinActiveFreq = input[6];
+- activity_monitor->Fclk_BoosterFreq = input[7];
+- break;
+- default:
+- return -EINVAL;
+- }
++static int smu_v13_0_7_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int ret, idx = -1, i;
++
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF, WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external), true);
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params =
++ kzalloc(SMU_13_0_7_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
++ }
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != SMU_13_0_7_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= SMU_13_0_7_CUSTOM_PARAMS_CLOCK_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * SMU_13_0_7_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = smu_v13_0_7_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
+ if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
+ return ret;
+ }
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, SMU_13_0_7_CUSTOM_PARAMS_SIZE);
+ }
+
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- smu->power_profile_mode);
+- if (workload_type < 0)
+- return -EINVAL;
+ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
+- 1 << workload_type, NULL);
++ backend_workload_mask, NULL);
+
+- if (ret)
+- dev_err(smu->adev->dev, "[%s] Failed to set work load mask!", __func__);
+- else
+- smu->workload_mask = (1 << workload_type);
++ if (ret) {
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
+
+ return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu14/smu_v14_0_2_ppt.c
+@@ -1751,90 +1751,120 @@ static int smu_v14_0_2_get_power_profile
+ return size;
+ }
+
+-static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
+- long *input,
+- uint32_t size)
++#define SMU_14_0_2_CUSTOM_PARAMS_COUNT 9
++#define SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT 2
++#define SMU_14_0_2_CUSTOM_PARAMS_SIZE (SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT * SMU_14_0_2_CUSTOM_PARAMS_COUNT * sizeof(long))
++
++static int smu_v14_0_2_set_power_profile_mode_coeff(struct smu_context *smu,
++ long *input)
+ {
+ DpmActivityMonitorCoeffIntExternal_t activity_monitor_external;
+ DpmActivityMonitorCoeffInt_t *activity_monitor =
+ &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+- int workload_type, ret = 0;
+- uint32_t current_profile_mode = smu->power_profile_mode;
+- smu->power_profile_mode = input[size];
+-
+- if (smu->power_profile_mode >= PP_SMC_POWER_PROFILE_COUNT) {
+- dev_err(smu->adev->dev, "Invalid power profile mode %d\n", smu->power_profile_mode);
+- return -EINVAL;
+- }
+-
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_CUSTOM) {
+- if (size != 9)
+- return -EINVAL;
+-
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external),
+- false);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
+- return ret;
+- }
++ int ret, idx;
+
+- switch (input[0]) {
+- case 0: /* Gfxclk */
+- activity_monitor->Gfx_FPS = input[1];
+- activity_monitor->Gfx_MinActiveFreqType = input[2];
+- activity_monitor->Gfx_MinActiveFreq = input[3];
+- activity_monitor->Gfx_BoosterFreqType = input[4];
+- activity_monitor->Gfx_BoosterFreq = input[5];
+- activity_monitor->Gfx_PD_Data_limit_c = input[6];
+- activity_monitor->Gfx_PD_Data_error_coeff = input[7];
+- activity_monitor->Gfx_PD_Data_error_rate_coeff = input[8];
+- break;
+- case 1: /* Fclk */
+- activity_monitor->Fclk_FPS = input[1];
+- activity_monitor->Fclk_MinActiveFreqType = input[2];
+- activity_monitor->Fclk_MinActiveFreq = input[3];
+- activity_monitor->Fclk_BoosterFreqType = input[4];
+- activity_monitor->Fclk_BoosterFreq = input[5];
+- activity_monitor->Fclk_PD_Data_limit_c = input[6];
+- activity_monitor->Fclk_PD_Data_error_coeff = input[7];
+- activity_monitor->Fclk_PD_Data_error_rate_coeff = input[8];
+- break;
+- default:
+- return -EINVAL;
+- }
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external),
++ false);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to get activity monitor!", __func__);
++ return ret;
++ }
+
+- ret = smu_cmn_update_table(smu,
+- SMU_TABLE_ACTIVITY_MONITOR_COEFF,
+- WORKLOAD_PPLIB_CUSTOM_BIT,
+- (void *)(&activity_monitor_external),
+- true);
+- if (ret) {
+- dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
+- return ret;
+- }
++ idx = 0 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Gfxclk */
++ activity_monitor->Gfx_FPS = input[idx + 1];
++ activity_monitor->Gfx_MinActiveFreqType = input[idx + 2];
++ activity_monitor->Gfx_MinActiveFreq = input[idx + 3];
++ activity_monitor->Gfx_BoosterFreqType = input[idx + 4];
++ activity_monitor->Gfx_BoosterFreq = input[idx + 5];
++ activity_monitor->Gfx_PD_Data_limit_c = input[idx + 6];
++ activity_monitor->Gfx_PD_Data_error_coeff = input[idx + 7];
++ activity_monitor->Gfx_PD_Data_error_rate_coeff = input[idx + 8];
++ }
++ idx = 1 * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++ if (input[idx]) {
++ /* Fclk */
++ activity_monitor->Fclk_FPS = input[idx + 1];
++ activity_monitor->Fclk_MinActiveFreqType = input[idx + 2];
++ activity_monitor->Fclk_MinActiveFreq = input[idx + 3];
++ activity_monitor->Fclk_BoosterFreqType = input[idx + 4];
++ activity_monitor->Fclk_BoosterFreq = input[idx + 5];
++ activity_monitor->Fclk_PD_Data_limit_c = input[idx + 6];
++ activity_monitor->Fclk_PD_Data_error_coeff = input[idx + 7];
++ activity_monitor->Fclk_PD_Data_error_rate_coeff = input[idx + 8];
+ }
+
+- if (smu->power_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
++ ret = smu_cmn_update_table(smu,
++ SMU_TABLE_ACTIVITY_MONITOR_COEFF,
++ WORKLOAD_PPLIB_CUSTOM_BIT,
++ (void *)(&activity_monitor_external),
++ true);
++ if (ret) {
++ dev_err(smu->adev->dev, "[%s] Failed to set activity monitor!", __func__);
++ return ret;
++ }
++
++ return ret;
++}
++
++static int smu_v14_0_2_set_power_profile_mode(struct smu_context *smu,
++ u32 workload_mask,
++ long *custom_params,
++ u32 custom_params_max_idx)
++{
++ u32 backend_workload_mask = 0;
++ int ret, idx = -1, i;
++
++ smu_cmn_get_backend_workload_mask(smu, workload_mask,
++ &backend_workload_mask);
++
++ /* disable deep sleep if compute is enabled */
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_COMPUTE))
+ smu_v14_0_deep_sleep_control(smu, false);
+- else if (current_profile_mode == PP_SMC_POWER_PROFILE_COMPUTE)
++ else
+ smu_v14_0_deep_sleep_control(smu, true);
+
+- /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
+- workload_type = smu_cmn_to_asic_specific_index(smu,
+- CMN2ASIC_MAPPING_WORKLOAD,
+- smu->power_profile_mode);
+- if (workload_type < 0)
+- return -EINVAL;
+-
+- ret = smu_cmn_send_smc_msg_with_param(smu,
+- SMU_MSG_SetWorkloadMask,
+- 1 << workload_type,
+- NULL);
+- if (!ret)
+- smu->workload_mask = 1 << workload_type;
++ if (workload_mask & (1 << PP_SMC_POWER_PROFILE_CUSTOM)) {
++ if (!smu->custom_profile_params) {
++ smu->custom_profile_params =
++ kzalloc(SMU_14_0_2_CUSTOM_PARAMS_SIZE, GFP_KERNEL);
++ if (!smu->custom_profile_params)
++ return -ENOMEM;
++ }
++ if (custom_params && custom_params_max_idx) {
++ if (custom_params_max_idx != SMU_14_0_2_CUSTOM_PARAMS_COUNT)
++ return -EINVAL;
++ if (custom_params[0] >= SMU_14_0_2_CUSTOM_PARAMS_CLOCK_COUNT)
++ return -EINVAL;
++ idx = custom_params[0] * SMU_14_0_2_CUSTOM_PARAMS_COUNT;
++ smu->custom_profile_params[idx] = 1;
++ for (i = 1; i < custom_params_max_idx; i++)
++ smu->custom_profile_params[idx + i] = custom_params[i];
++ }
++ ret = smu_v14_0_2_set_power_profile_mode_coeff(smu,
++ smu->custom_profile_params);
++ if (ret) {
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
++ } else if (smu->custom_profile_params) {
++ memset(smu->custom_profile_params, 0, SMU_14_0_2_CUSTOM_PARAMS_SIZE);
++ }
++
++ ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_SetWorkloadMask,
++ backend_workload_mask, NULL);
++ if (ret) {
++ dev_err(smu->adev->dev, "Failed to set workload mask 0x%08x\n",
++ workload_mask);
++ if (idx != -1)
++ smu->custom_profile_params[idx] = 0;
++ return ret;
++ }
+
+ return ret;
+ }
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
+@@ -1215,3 +1215,28 @@ void smu_cmn_generic_plpd_policy_desc(st
+ {
+ policy->desc = &xgmi_plpd_policy_desc;
+ }
++
++void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
++ u32 workload_mask,
++ u32 *backend_workload_mask)
++{
++ int workload_type;
++ u32 profile_mode;
++
++ *backend_workload_mask = 0;
++
++ for (profile_mode = 0; profile_mode < PP_SMC_POWER_PROFILE_COUNT; profile_mode++) {
++ if (!(workload_mask & (1 << profile_mode)))
++ continue;
++
++ /* conv PP_SMC_POWER_PROFILE* to WORKLOAD_PPLIB_*_BIT */
++ workload_type = smu_cmn_to_asic_specific_index(smu,
++ CMN2ASIC_MAPPING_WORKLOAD,
++ profile_mode);
++
++ if (workload_type < 0)
++ continue;
++
++ *backend_workload_mask |= 1 << workload_type;
++ }
++}
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu_cmn.h
+@@ -147,5 +147,9 @@ bool smu_cmn_is_audio_func_enabled(struc
+ void smu_cmn_generic_soc_policy_desc(struct smu_dpm_policy *policy);
+ void smu_cmn_generic_plpd_policy_desc(struct smu_dpm_policy *policy);
+
++void smu_cmn_get_backend_workload_mask(struct smu_context *smu,
++ u32 workload_mask,
++ u32 *backend_workload_mask);
++
+ #endif
+ #endif
--- /dev/null
+From c9b8dcabb52afe88413ff135a0953e3cc4128483 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:22:51 -0500
+Subject: drm/amdgpu/hdp4.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit c9b8dcabb52afe88413ff135a0953e3cc4128483 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v4_0.c
+@@ -40,10 +40,12 @@
+ static void hdp_v4_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v4_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -54,11 +56,13 @@ static void hdp_v4_0_invalidate_hdp(stru
+ amdgpu_ip_version(adev, HDP_HWIP, 0) == IP_VERSION(4, 4, 5))
+ return;
+
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
+- else
++ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
++ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
++ }
+ }
+
+ static void hdp_v4_0_query_ras_error_count(struct amdgpu_device *adev,
--- /dev/null
+From cf424020e040be35df05b682b546b255e74a420f Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:23:56 -0500
+Subject: drm/amdgpu/hdp5.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit cf424020e040be35df05b682b546b255e74a420f upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v5_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v5_0_invalidate_hdp(struct amdgpu_device *adev,
+@@ -42,6 +44,7 @@ static void hdp_v5_0_invalidate_hdp(stru
+ {
+ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE, 1);
++ RREG32_SOC15_NO_KIQ(HDP, 0, mmHDP_READ_CACHE_INVALIDATE);
+ } else {
+ amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET(
+ HDP, 0, mmHDP_READ_CACHE_INVALIDATE), 1);
--- /dev/null
+From f756dbac1ce1d5f9a2b35e3b55fa429cf6336437 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:24:13 -0500
+Subject: drm/amdgpu/hdp5.2: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit f756dbac1ce1d5f9a2b35e3b55fa429cf6336437 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v5_2.c
+@@ -31,13 +31,15 @@
+ static void hdp_v5_2_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
+- else
++ RREG32_NO_KIQ((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring,
+ (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2,
+ 0);
++ }
+ }
+
+ static void hdp_v5_2_update_mem_power_gating(struct amdgpu_device *adev,
--- /dev/null
+From abe1cbaec6cfe9fde609a15cd6a12c812282ce77 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Fri, 22 Nov 2024 11:24:38 -0500
+Subject: drm/amdgpu/hdp6.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit abe1cbaec6cfe9fde609a15cd6a12c812282ce77 upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v6_0.c
+@@ -34,10 +34,12 @@
+ static void hdp_v6_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v6_0_update_clock_gating(struct amdgpu_device *adev,
--- /dev/null
+From 689275140cb8e9f8ae59e545086fce51fb0b994a Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Thu, 28 Nov 2024 16:05:24 +0800
+Subject: drm/amdgpu/hdp7.0: do a posting read when flushing HDP
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 689275140cb8e9f8ae59e545086fce51fb0b994a upstream.
+
+Need to read back to make sure the write goes through.
+
+Cc: David Belanger <david.belanger@amd.com>
+Reviewed-by: Frank Min <frank.min@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
++++ b/drivers/gpu/drm/amd/amdgpu/hdp_v7_0.c
+@@ -31,10 +31,12 @@
+ static void hdp_v7_0_flush_hdp(struct amdgpu_device *adev,
+ struct amdgpu_ring *ring)
+ {
+- if (!ring || !ring->funcs->emit_wreg)
++ if (!ring || !ring->funcs->emit_wreg) {
+ WREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
+- else
++ RREG32((adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2);
++ } else {
+ amdgpu_ring_emit_wreg(ring, (adev->rmmio_remap.reg_offset + KFD_MMIO_REMAP_HDP_MEM_FLUSH_CNTL) >> 2, 0);
++ }
+ }
+
+ static void hdp_v7_0_update_clock_gating(struct amdgpu_device *adev,
--- /dev/null
+From 73dae652dcac776296890da215ee7dec357a1032 Mon Sep 17 00:00:00 2001
+From: Alex Deucher <alexander.deucher@amd.com>
+Date: Mon, 25 Nov 2024 13:59:09 -0500
+Subject: drm/amdgpu: rework resume handling for display (v2)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+commit 73dae652dcac776296890da215ee7dec357a1032 upstream.
+
+Split resume into a 3rd step to handle displays when DCC is
+enabled on DCN 4.0.1. Move display after the buffer funcs
+have been re-enabled so that the GPU will do the move and
+properly set the DCC metadata for DCN.
+
+v2: fix fence irq resume ordering
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 45 +++++++++++++++++++++++++++--
+ 1 file changed, 43 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+@@ -3666,7 +3666,7 @@ static int amdgpu_device_ip_resume_phase
+ *
+ * @adev: amdgpu_device pointer
+ *
+- * First resume function for hardware IPs. The list of all the hardware
++ * Second resume function for hardware IPs. The list of all the hardware
+ * IPs that make up the asic is walked and the resume callbacks are run for
+ * all blocks except COMMON, GMC, and IH. resume puts the hardware into a
+ * functional state after a suspend and updates the software state as
+@@ -3684,6 +3684,7 @@ static int amdgpu_device_ip_resume_phase
+ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH ||
++ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE ||
+ adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)
+ continue;
+ r = adev->ip_blocks[i].version->funcs->resume(adev);
+@@ -3699,6 +3700,36 @@ static int amdgpu_device_ip_resume_phase
+ }
+
+ /**
++ * amdgpu_device_ip_resume_phase3 - run resume for hardware IPs
++ *
++ * @adev: amdgpu_device pointer
++ *
++ * Third resume function for hardware IPs. The list of all the hardware
++ * IPs that make up the asic is walked and the resume callbacks are run for
++ * all DCE. resume puts the hardware into a functional state after a suspend
++ * and updates the software state as necessary. This function is also used
++ * for restoring the GPU after a GPU reset.
++ *
++ * Returns 0 on success, negative error code on failure.
++ */
++static int amdgpu_device_ip_resume_phase3(struct amdgpu_device *adev)
++{
++ int i, r;
++
++ for (i = 0; i < adev->num_ip_blocks; i++) {
++ if (!adev->ip_blocks[i].status.valid || adev->ip_blocks[i].status.hw)
++ continue;
++ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
++ r = amdgpu_ip_block_resume(&adev->ip_blocks[i]);
++ if (r)
++ return r;
++ }
++ }
++
++ return 0;
++}
++
++/**
+ * amdgpu_device_ip_resume - run resume for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+@@ -3727,6 +3758,13 @@ static int amdgpu_device_ip_resume(struc
+ if (adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(adev, true);
+
++ if (r)
++ return r;
++
++ amdgpu_fence_driver_hw_init(adev);
++
++ r = amdgpu_device_ip_resume_phase3(adev);
++
+ return r;
+ }
+
+@@ -4809,7 +4847,6 @@ int amdgpu_device_resume(struct drm_devi
+ dev_err(adev->dev, "amdgpu_device_ip_resume failed (%d).\n", r);
+ goto exit;
+ }
+- amdgpu_fence_driver_hw_init(adev);
+
+ if (!adev->in_s0ix) {
+ r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+@@ -5431,6 +5468,10 @@ int amdgpu_do_asic_reset(struct list_hea
+ if (tmp_adev->mman.buffer_funcs_ring->sched.ready)
+ amdgpu_ttm_set_buffer_funcs_status(tmp_adev, true);
+
++ r = amdgpu_device_ip_resume_phase3(tmp_adev);
++ if (r)
++ goto out;
++
+ if (vram_lost)
+ amdgpu_device_fill_reset_magic(tmp_adev);
+
--- /dev/null
+From 33114f1057ea5cf40e604021711a9711a060fcb6 Mon Sep 17 00:00:00 2001
+From: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+Date: Thu, 28 Nov 2024 12:05:56 -0500
+Subject: drm/amdkfd: add MEC version that supports no PCIe atomics for GFX12
+
+From: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+
+commit 33114f1057ea5cf40e604021711a9711a060fcb6 upstream.
+
+Add MEC version from which alternate support for no PCIe atomics
+is provided so that device is not skipped during KFD device init in
+GFX1200/GFX1201.
+
+Signed-off-by: Sreekant Somasekharan <sreekant.somasekharan@amd.com>
+Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.11.x
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_device.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+index 956198da7859..9b51dd75fefc 100644
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+@@ -235,6 +235,9 @@ static void kfd_device_info_init(struct kfd_dev *kfd,
+ */
+ kfd->device_info.needs_pci_atomics = true;
+ kfd->device_info.no_atomic_fw_version = kfd->adev->gfx.rs64_enable ? 509 : 0;
++ } else if (gc_version < IP_VERSION(13, 0, 0)) {
++ kfd->device_info.needs_pci_atomics = true;
++ kfd->device_info.no_atomic_fw_version = 2090;
+ } else {
+ kfd->device_info.needs_pci_atomics = true;
+ }
+--
+2.47.1
+
--- /dev/null
+From 55ed120dcfdde2478c3ebfa1c0ac4ed1e430053b Mon Sep 17 00:00:00 2001
+From: David Yat Sin <David.YatSin@amd.com>
+Date: Tue, 26 Nov 2024 15:18:47 -0500
+Subject: drm/amdkfd: hard-code cacheline for gc943,gc944
+
+From: David Yat Sin <David.YatSin@amd.com>
+
+commit 55ed120dcfdde2478c3ebfa1c0ac4ed1e430053b upstream.
+
+Cacheline size is not available in IP discovery for gc943,gc944.
+
+Signed-off-by: David Yat Sin <David.YatSin@amd.com>
+Reviewed-by: Harish Kasiviswanathan <Harish.Kasiviswanathan@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
++++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+@@ -1509,6 +1509,8 @@ static int kfd_fill_gpu_cache_info_from_
+ if (adev->gfx.config.gc_tcp_size_per_cu) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcp_size_per_cu;
+ pcache_info[i].cache_level = 1;
++ /* Cacheline size not available in IP discovery for gc943,gc944 */
++ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1520,6 +1522,7 @@ static int kfd_fill_gpu_cache_info_from_
+ pcache_info[i].cache_size =
+ adev->gfx.config.gc_l1_instruction_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
++ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_INST_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1530,6 +1533,7 @@ static int kfd_fill_gpu_cache_info_from_
+ if (adev->gfx.config.gc_l1_data_cache_size_per_sqc) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_l1_data_cache_size_per_sqc;
+ pcache_info[i].cache_level = 1;
++ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1540,6 +1544,7 @@ static int kfd_fill_gpu_cache_info_from_
+ if (adev->gfx.config.gc_tcc_size) {
+ pcache_info[i].cache_size = adev->gfx.config.gc_tcc_size;
+ pcache_info[i].cache_level = 2;
++ pcache_info[i].cache_line_size = 128;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
+@@ -1550,6 +1555,7 @@ static int kfd_fill_gpu_cache_info_from_
+ if (adev->gmc.mall_size) {
+ pcache_info[i].cache_size = adev->gmc.mall_size / 1024;
+ pcache_info[i].cache_level = 3;
++ pcache_info[i].cache_line_size = 64;
+ pcache_info[i].flags = (CRAT_CACHE_FLAGS_ENABLED |
+ CRAT_CACHE_FLAGS_DATA_CACHE |
+ CRAT_CACHE_FLAGS_SIMD_CACHE);
--- /dev/null
+From bd2fccac61b40eaf08d9546acc9fef958bfe4763 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Mon, 25 Nov 2024 22:53:14 +0200
+Subject: drm/dp_mst: Fix MST sideband message body length check
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit bd2fccac61b40eaf08d9546acc9fef958bfe4763 upstream.
+
+Fix the MST sideband message body length check, which must be at least 1
+byte accounting for the message body CRC (aka message data CRC) at the
+end of the message.
+
+This fixes a case where an MST branch device returns a header with a
+correct header CRC (indicating a correctly received body length), with
+the body length being incorrectly set to 0. This will later lead to a
+memory corruption in drm_dp_sideband_append_payload() and the following
+errors in dmesg:
+
+ UBSAN: array-index-out-of-bounds in drivers/gpu/drm/display/drm_dp_mst_topology.c:786:25
+ index -1 is out of range for type 'u8 [48]'
+ Call Trace:
+ drm_dp_sideband_append_payload+0x33d/0x350 [drm_display_helper]
+ drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper]
+ drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper]
+
+ memcpy: detected field-spanning write (size 18446744073709551615) of single field "&msg->msg[msg->curlen]" at drivers/gpu/drm/display/drm_dp_mst_topology.c:791 (size 256)
+ Call Trace:
+ drm_dp_sideband_append_payload+0x324/0x350 [drm_display_helper]
+ drm_dp_get_one_sb_msg+0x3ce/0x5f0 [drm_display_helper]
+ drm_dp_mst_hpd_irq_handle_event+0xc8/0x1580 [drm_display_helper]
+
+Cc: <stable@vger.kernel.org>
+Cc: Lyude Paul <lyude@redhat.com>
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241125205314.1725887-1-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -320,6 +320,9 @@ static bool drm_dp_decode_sideband_msg_h
+ hdr->broadcast = (buf[idx] >> 7) & 0x1;
+ hdr->path_msg = (buf[idx] >> 6) & 0x1;
+ hdr->msg_len = buf[idx] & 0x3f;
++ if (hdr->msg_len < 1) /* min space for body CRC */
++ return false;
++
+ idx++;
+ hdr->somt = (buf[idx] >> 7) & 0x1;
+ hdr->eomt = (buf[idx] >> 6) & 0x1;
--- /dev/null
+From a6fa67d26de385c3c7a23c1e109a0e23bfda4ec7 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 3 Dec 2024 18:02:17 +0200
+Subject: drm/dp_mst: Fix resetting msg rx state after topology removal
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit a6fa67d26de385c3c7a23c1e109a0e23bfda4ec7 upstream.
+
+If the MST topology is removed during the reception of an MST down reply
+or MST up request sideband message, the
+drm_dp_mst_topology_mgr::up_req_recv/down_rep_recv states could be reset
+from one thread via drm_dp_mst_topology_mgr_set_mst(false), racing with
+the reading/parsing of the message from another thread via
+drm_dp_mst_handle_down_rep() or drm_dp_mst_handle_up_req(). The race is
+possible since the reader/parser doesn't hold any lock while accessing
+the reception state. This in turn can lead to a memory corruption in the
+reader/parser as described by commit bd2fccac61b4 ("drm/dp_mst: Fix MST
+sideband message body length check").
+
+Fix the above by resetting the message reception state if needed before
+reading/parsing a message. Another solution would be to hold the
+drm_dp_mst_topology_mgr::lock for the whole duration of the message
+reception/parsing in drm_dp_mst_handle_down_rep() and
+drm_dp_mst_handle_up_req(), however this would require a bigger change.
+Since the fix is also needed for stable, opting for the simpler solution
+in this patch.
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: <stable@vger.kernel.org>
+Fixes: 1d082618bbf3 ("drm/display/dp_mst: Fix down/up message handling after sink disconnect")
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/13056
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-2-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c | 21 +++++++++++++++++++--
+ include/drm/display/drm_dp_mst_helper.h | 7 +++++++
+ 2 files changed, 26 insertions(+), 2 deletions(-)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -3700,8 +3700,7 @@ int drm_dp_mst_topology_mgr_set_mst(stru
+ ret = 0;
+ mgr->payload_id_table_cleared = false;
+
+- memset(&mgr->down_rep_recv, 0, sizeof(mgr->down_rep_recv));
+- memset(&mgr->up_req_recv, 0, sizeof(mgr->up_req_recv));
++ mgr->reset_rx_state = true;
+ }
+
+ out_unlock:
+@@ -3859,6 +3858,11 @@ out_fail:
+ }
+ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_resume);
+
++static void reset_msg_rx_state(struct drm_dp_sideband_msg_rx *msg)
++{
++ memset(msg, 0, sizeof(*msg));
++}
++
+ static bool
+ drm_dp_get_one_sb_msg(struct drm_dp_mst_topology_mgr *mgr, bool up,
+ struct drm_dp_mst_branch **mstb)
+@@ -4172,6 +4176,17 @@ out:
+ return 0;
+ }
+
++static void update_msg_rx_state(struct drm_dp_mst_topology_mgr *mgr)
++{
++ mutex_lock(&mgr->lock);
++ if (mgr->reset_rx_state) {
++ mgr->reset_rx_state = false;
++ reset_msg_rx_state(&mgr->down_rep_recv);
++ reset_msg_rx_state(&mgr->up_req_recv);
++ }
++ mutex_unlock(&mgr->lock);
++}
++
+ /**
+ * drm_dp_mst_hpd_irq_handle_event() - MST hotplug IRQ handle MST event
+ * @mgr: manager to notify irq for.
+@@ -4206,6 +4221,8 @@ int drm_dp_mst_hpd_irq_handle_event(stru
+ *handled = true;
+ }
+
++ update_msg_rx_state(mgr);
++
+ if (esi[1] & DP_DOWN_REP_MSG_RDY) {
+ ret = drm_dp_mst_handle_down_rep(mgr);
+ *handled = true;
+--- a/include/drm/display/drm_dp_mst_helper.h
++++ b/include/drm/display/drm_dp_mst_helper.h
+@@ -700,6 +700,13 @@ struct drm_dp_mst_topology_mgr {
+ bool payload_id_table_cleared : 1;
+
+ /**
++ * @reset_rx_state: The down request's reply and up request message
++ * receiver state must be reset, after the topology manager got
++ * removed. Protected by @lock.
++ */
++ bool reset_rx_state : 1;
++
++ /**
+ * @payload_count: The number of currently active payloads in hardware. This value is only
+ * intended to be used internally by MST helpers for payload tracking, and is only safe to
+ * read/write from the atomic commit (not check) context.
--- /dev/null
+From 4d49e77a973d3b5d1881663c3f122906a0702940 Mon Sep 17 00:00:00 2001
+From: Imre Deak <imre.deak@intel.com>
+Date: Tue, 3 Dec 2024 18:02:18 +0200
+Subject: drm/dp_mst: Verify request type in the corresponding down message reply
+
+From: Imre Deak <imre.deak@intel.com>
+
+commit 4d49e77a973d3b5d1881663c3f122906a0702940 upstream.
+
+After receiving the response for an MST down request message, the
+response should be accepted/parsed only if the response type matches
+that of the request. Ensure this by checking if the request type code
+stored both in the request and the reply match, dropping the reply in
+case of a mismatch.
+
+This fixes the topology detection for an MST hub, as described in the
+Closes link below, where the hub sends an incorrect reply message after
+a CLEAR_PAYLOAD_TABLE -> LINK_ADDRESS down request message sequence.
+
+Cc: Lyude Paul <lyude@redhat.com>
+Cc: <stable@vger.kernel.org>
+Closes: https://gitlab.freedesktop.org/drm/i915/kernel/-/issues/12804
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241203160223.2926014-3-imre.deak@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c | 31 ++++++++++++++++++++++++++
+ 1 file changed, 31 insertions(+)
+
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -3937,6 +3937,34 @@ drm_dp_get_one_sb_msg(struct drm_dp_mst_
+ return true;
+ }
+
++static int get_msg_request_type(u8 data)
++{
++ return data & 0x7f;
++}
++
++static bool verify_rx_request_type(struct drm_dp_mst_topology_mgr *mgr,
++ const struct drm_dp_sideband_msg_tx *txmsg,
++ const struct drm_dp_sideband_msg_rx *rxmsg)
++{
++ const struct drm_dp_sideband_msg_hdr *hdr = &rxmsg->initial_hdr;
++ const struct drm_dp_mst_branch *mstb = txmsg->dst;
++ int tx_req_type = get_msg_request_type(txmsg->msg[0]);
++ int rx_req_type = get_msg_request_type(rxmsg->msg[0]);
++ char rad_str[64];
++
++ if (tx_req_type == rx_req_type)
++ return true;
++
++ drm_dp_mst_rad_to_str(mstb->rad, mstb->lct, rad_str, sizeof(rad_str));
++ drm_dbg_kms(mgr->dev,
++ "Got unexpected MST reply, mstb: %p seqno: %d lct: %d rad: %s rx_req_type: %s (%02x) != tx_req_type: %s (%02x)\n",
++ mstb, hdr->seqno, mstb->lct, rad_str,
++ drm_dp_mst_req_type_str(rx_req_type), rx_req_type,
++ drm_dp_mst_req_type_str(tx_req_type), tx_req_type);
++
++ return false;
++}
++
+ static int drm_dp_mst_handle_down_rep(struct drm_dp_mst_topology_mgr *mgr)
+ {
+ struct drm_dp_sideband_msg_tx *txmsg;
+@@ -3966,6 +3994,9 @@ static int drm_dp_mst_handle_down_rep(st
+ goto out_clear_reply;
+ }
+
++ if (!verify_rx_request_type(mgr, txmsg, msg))
++ goto out_clear_reply;
++
+ drm_dp_sideband_parse_reply(mgr, msg, &txmsg->reply);
+
+ if (txmsg->reply.reply_type == DP_SIDEBAND_REPLY_NAK) {
--- /dev/null
+From 87a0d90fcd31c0f36da0332428c9e1a1e0f97432 Mon Sep 17 00:00:00 2001
+From: Ulf Hansson <ulf.hansson@linaro.org>
+Date: Mon, 25 Nov 2024 13:24:46 +0100
+Subject: mmc: core: Further prevent card detect during shutdown
+
+From: Ulf Hansson <ulf.hansson@linaro.org>
+
+commit 87a0d90fcd31c0f36da0332428c9e1a1e0f97432 upstream.
+
+Disabling card detect from the host's ->shutdown_pre() callback turned out
+to not be the complete solution. More precisely, beyond the point when the
+mmc_bus->shutdown() has been called, to gracefully power off the card, we
+need to prevent card detect. Otherwise the mmc_rescan work may poll for the
+card with a CMD13, to see if it's still alive, which then will fail and
+hang as the card has already been powered off.
+
+To fix this problem, let's disable mmc_rescan prior to power off the card
+during shutdown.
+
+Reported-by: Anthony Pighin <anthony.pighin@nokia.com>
+Fixes: 66c915d09b94 ("mmc: core: Disable card detect during shutdown")
+Cc: stable@vger.kernel.org
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Reviewed-by: Adrian Hunter <adrian.hunter@intel.com>
+Closes: https://lore.kernel.org/all/BN0PR08MB695133000AF116F04C3A9FFE83212@BN0PR08MB6951.namprd08.prod.outlook.com/
+Tested-by: Anthony Pighin <anthony.pighin@nokia.com>
+Message-ID: <20241125122446.18684-1-ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/core/bus.c | 2 ++
+ drivers/mmc/core/core.c | 3 +++
+ 2 files changed, 5 insertions(+)
+
+--- a/drivers/mmc/core/bus.c
++++ b/drivers/mmc/core/bus.c
+@@ -149,6 +149,8 @@ static void mmc_bus_shutdown(struct devi
+ if (dev->driver && drv->shutdown)
+ drv->shutdown(card);
+
++ __mmc_stop_host(host);
++
+ if (host->bus_ops->shutdown) {
+ ret = host->bus_ops->shutdown(host);
+ if (ret)
+--- a/drivers/mmc/core/core.c
++++ b/drivers/mmc/core/core.c
+@@ -2296,6 +2296,9 @@ void mmc_start_host(struct mmc_host *hos
+
+ void __mmc_stop_host(struct mmc_host *host)
+ {
++ if (host->rescan_disable)
++ return;
++
+ if (host->slot.cd_irq >= 0) {
+ mmc_gpio_set_cd_wake(host, false);
+ disable_irq(host->slot.cd_irq);
--- /dev/null
+From 7f0fa47ceebcff0e3591bb7e32a71a2cd7846149 Mon Sep 17 00:00:00 2001
+From: Hans de Goede <hdegoede@redhat.com>
+Date: Mon, 18 Nov 2024 22:00:49 +0100
+Subject: mmc: sdhci-pci: Add DMI quirk for missing CD GPIO on Vexia Edu Atla 10 tablet
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+commit 7f0fa47ceebcff0e3591bb7e32a71a2cd7846149 upstream.
+
+The Vexia Edu Atla 10 tablet distributed to schools in the Spanish
+Andalucía region has no ACPI fwnode associated with the SDHCI controller
+for its microsd-slot and thus has no ACPI GPIO resource info.
+
+This causes the following error to be logged and the slot to not work:
+[ 10.572113] sdhci-pci 0000:00:12.0: failed to setup card detect gpio
+
+Add a DMI quirk table for providing gpiod_lookup_tables with manually
+provided CD GPIO info and use this DMI table to provide the CD GPIO info
+on this tablet. This fixes the microsd-slot not working.
+
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Acked-by: Adrian Hunter <adrian.hunter@intel.com>
+Cc: stable@vger.kernel.org
+Message-ID: <20241118210049.311079-1-hdegoede@redhat.com>
+Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/mmc/host/sdhci-pci-core.c | 72 ++++++++++++++++++++++++++++++++++++++
+ drivers/mmc/host/sdhci-pci.h | 1
+ 2 files changed, 73 insertions(+)
+
+--- a/drivers/mmc/host/sdhci-pci-core.c
++++ b/drivers/mmc/host/sdhci-pci-core.c
+@@ -21,6 +21,7 @@
+ #include <linux/io.h>
+ #include <linux/iopoll.h>
+ #include <linux/gpio.h>
++#include <linux/gpio/machine.h>
+ #include <linux/pm_runtime.h>
+ #include <linux/pm_qos.h>
+ #include <linux/debugfs.h>
+@@ -1235,6 +1236,29 @@ static const struct sdhci_pci_fixes sdhc
+ .priv_size = sizeof(struct intel_host),
+ };
+
++/* DMI quirks for devices with missing or broken CD GPIO info */
++static const struct gpiod_lookup_table vexia_edu_atla10_cd_gpios = {
++ .dev_id = "0000:00:12.0",
++ .table = {
++ GPIO_LOOKUP("INT33FC:00", 38, "cd", GPIO_ACTIVE_HIGH),
++ { }
++ },
++};
++
++static const struct dmi_system_id sdhci_intel_byt_cd_gpio_override[] = {
++ {
++ /* Vexia Edu Atla 10 tablet 9V version */
++ .matches = {
++ DMI_MATCH(DMI_BOARD_VENDOR, "AMI Corporation"),
++ DMI_MATCH(DMI_BOARD_NAME, "Aptio CRB"),
++ /* Above strings are too generic, also match on BIOS date */
++ DMI_MATCH(DMI_BIOS_DATE, "08/25/2014"),
++ },
++ .driver_data = (void *)&vexia_edu_atla10_cd_gpios,
++ },
++ { }
++};
++
+ static const struct sdhci_pci_fixes sdhci_intel_byt_sd = {
+ #ifdef CONFIG_PM_SLEEP
+ .resume = byt_resume,
+@@ -1253,6 +1277,7 @@ static const struct sdhci_pci_fixes sdhc
+ .add_host = byt_add_host,
+ .remove_slot = byt_remove_slot,
+ .ops = &sdhci_intel_byt_ops,
++ .cd_gpio_override = sdhci_intel_byt_cd_gpio_override,
+ .priv_size = sizeof(struct intel_host),
+ };
+
+@@ -2054,6 +2079,42 @@ static const struct dev_pm_ops sdhci_pci
+ * *
+ \*****************************************************************************/
+
++static struct gpiod_lookup_table *sdhci_pci_add_gpio_lookup_table(
++ struct sdhci_pci_chip *chip)
++{
++ struct gpiod_lookup_table *cd_gpio_lookup_table;
++ const struct dmi_system_id *dmi_id = NULL;
++ size_t count;
++
++ if (chip->fixes && chip->fixes->cd_gpio_override)
++ dmi_id = dmi_first_match(chip->fixes->cd_gpio_override);
++
++ if (!dmi_id)
++ return NULL;
++
++ cd_gpio_lookup_table = dmi_id->driver_data;
++ for (count = 0; cd_gpio_lookup_table->table[count].key; count++)
++ ;
++
++ cd_gpio_lookup_table = kmemdup(dmi_id->driver_data,
++ /* count + 1 terminating entry */
++ struct_size(cd_gpio_lookup_table, table, count + 1),
++ GFP_KERNEL);
++ if (!cd_gpio_lookup_table)
++ return ERR_PTR(-ENOMEM);
++
++ gpiod_add_lookup_table(cd_gpio_lookup_table);
++ return cd_gpio_lookup_table;
++}
++
++static void sdhci_pci_remove_gpio_lookup_table(struct gpiod_lookup_table *lookup_table)
++{
++ if (lookup_table) {
++ gpiod_remove_lookup_table(lookup_table);
++ kfree(lookup_table);
++ }
++}
++
+ static struct sdhci_pci_slot *sdhci_pci_probe_slot(
+ struct pci_dev *pdev, struct sdhci_pci_chip *chip, int first_bar,
+ int slotno)
+@@ -2129,8 +2190,19 @@ static struct sdhci_pci_slot *sdhci_pci_
+ device_init_wakeup(&pdev->dev, true);
+
+ if (slot->cd_idx >= 0) {
++ struct gpiod_lookup_table *cd_gpio_lookup_table;
++
++ cd_gpio_lookup_table = sdhci_pci_add_gpio_lookup_table(chip);
++ if (IS_ERR(cd_gpio_lookup_table)) {
++ ret = PTR_ERR(cd_gpio_lookup_table);
++ goto remove;
++ }
++
+ ret = mmc_gpiod_request_cd(host->mmc, "cd", slot->cd_idx,
+ slot->cd_override_level, 0);
++
++ sdhci_pci_remove_gpio_lookup_table(cd_gpio_lookup_table);
++
+ if (ret && ret != -EPROBE_DEFER)
+ ret = mmc_gpiod_request_cd(host->mmc, NULL,
+ slot->cd_idx,
+--- a/drivers/mmc/host/sdhci-pci.h
++++ b/drivers/mmc/host/sdhci-pci.h
+@@ -156,6 +156,7 @@ struct sdhci_pci_fixes {
+ #endif
+
+ const struct sdhci_ops *ops;
++ const struct dmi_system_id *cd_gpio_override;
+ size_t priv_size;
+ };
+
--- /dev/null
+From 7912405643a14b527cd4a4f33c1d4392da900888 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Sun, 1 Dec 2024 12:17:30 +0100
+Subject: modpost: Add .irqentry.text to OTHER_SECTIONS
+
+From: Thomas Gleixner <tglx@linutronix.de>
+
+commit 7912405643a14b527cd4a4f33c1d4392da900888 upstream.
+
+The compiler can fully inline the actual handler function of an interrupt
+entry into the .irqentry.text entry point. If such a function contains an
+access which has an exception table entry, modpost complains about a
+section mismatch:
+
+ WARNING: vmlinux.o(__ex_table+0x447c): Section mismatch in reference ...
+
+ The relocation at __ex_table+0x447c references section ".irqentry.text"
+ which is not in the list of authorized sections.
+
+Add .irqentry.text to OTHER_SECTIONS to cure the issue.
+
+Reported-by: Sergey Senozhatsky <senozhatsky@chromium.org>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Cc: stable@vger.kernel.org # needed for linux-5.4-y
+Link: https://lore.kernel.org/all/20241128111844.GE10431@google.com/
+Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ scripts/mod/modpost.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/scripts/mod/modpost.c
++++ b/scripts/mod/modpost.c
+@@ -785,7 +785,7 @@ static void check_section(const char *mo
+ ".ltext", ".ltext.*"
+ #define OTHER_TEXT_SECTIONS ".ref.text", ".head.text", ".spinlock.text", \
+ ".fixup", ".entry.text", ".exception.text", \
+- ".coldtext", ".softirqentry.text"
++ ".coldtext", ".softirqentry.text", ".irqentry.text"
+
+ #define ALL_TEXT_SECTIONS ".init.text", ".exit.text", \
+ TEXT_SECTIONS, OTHER_TEXT_SECTIONS
--- /dev/null
+From 3061e170381af96d1e66799d34264e6414d428a7 Mon Sep 17 00:00:00 2001
+From: Cosmin Tanislav <demonsingur@gmail.com>
+Date: Thu, 28 Nov 2024 15:16:23 +0200
+Subject: regmap: detach regmap from dev on regmap_exit
+
+From: Cosmin Tanislav <demonsingur@gmail.com>
+
+commit 3061e170381af96d1e66799d34264e6414d428a7 upstream.
+
+At the end of __regmap_init(), if dev is not NULL, regmap_attach_dev()
+is called, which adds a devres reference to the regmap, to be able to
+retrieve a dev's regmap by name using dev_get_regmap().
+
+When calling regmap_exit, the opposite does not happen, and the
+reference is kept until the dev is detached.
+
+Add a regmap_detach_dev() function and call it in regmap_exit() to make
+sure that the devres reference is not kept.
+
+Cc: stable@vger.kernel.org
+Fixes: 72b39f6f2b5a ("regmap: Implement dev_get_regmap()")
+Signed-off-by: Cosmin Tanislav <demonsingur@gmail.com>
+Rule: add
+Link: https://lore.kernel.org/stable/20241128130554.362486-1-demonsingur%40gmail.com
+Link: https://patch.msgid.link/20241128131625.363835-1-demonsingur@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/base/regmap/regmap.c | 12 ++++++++++++
+ 1 file changed, 12 insertions(+)
+
+--- a/drivers/base/regmap/regmap.c
++++ b/drivers/base/regmap/regmap.c
+@@ -598,6 +598,17 @@ int regmap_attach_dev(struct device *dev
+ }
+ EXPORT_SYMBOL_GPL(regmap_attach_dev);
+
++static int dev_get_regmap_match(struct device *dev, void *res, void *data);
++
++static int regmap_detach_dev(struct device *dev, struct regmap *map)
++{
++ if (!dev)
++ return 0;
++
++ return devres_release(dev, dev_get_regmap_release,
++ dev_get_regmap_match, (void *)map->name);
++}
++
+ static enum regmap_endian regmap_get_reg_endian(const struct regmap_bus *bus,
+ const struct regmap_config *config)
+ {
+@@ -1444,6 +1455,7 @@ void regmap_exit(struct regmap *map)
+ {
+ struct regmap_async *async;
+
++ regmap_detach_dev(map->dev, map);
+ regcache_exit(map);
+
+ regmap_debugfs_exit(map);
--- /dev/null
+From 4ae132c693896b0713db572676c90ffd855a4246 Mon Sep 17 00:00:00 2001
+From: Mark Brown <broonie@kernel.org>
+Date: Wed, 27 Nov 2024 16:14:22 +0000
+Subject: selftest: hugetlb_dio: fix test naming
+
+From: Mark Brown <broonie@kernel.org>
+
+commit 4ae132c693896b0713db572676c90ffd855a4246 upstream.
+
+The string logged when a test passes or fails is used by the selftest
+framework to identify which test is being reported. The hugetlb_dio test
+not only uses the same strings for every test that is run but it also uses
+different strings for test passes and failures which means that test
+automation is unable to follow what the test is doing at all.
+
+Pull the existing duplicated logging of the number of free huge pages
+before and after the test out of the conditional and replace that and the
+logging of the result with a single ksft_print_result() which incorporates
+the parameters passed into the test into the output.
+
+Link: https://lkml.kernel.org/r/20241127-kselftest-mm-hugetlb-dio-names-v1-1-22aab01bf550@kernel.org
+Fixes: fae1980347bf ("selftests: hugetlb_dio: fixup check for initial conditions to skip in the start")
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Reviewed-by: Muhammad Usama Anjum <usama.anjum@collabora.com>
+Cc: Donet Tom <donettom@linux.ibm.com>
+Cc: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/mm/hugetlb_dio.c | 14 +++++---------
+ 1 file changed, 5 insertions(+), 9 deletions(-)
+
+diff --git a/tools/testing/selftests/mm/hugetlb_dio.c b/tools/testing/selftests/mm/hugetlb_dio.c
+index 432d5af15e66..db63abe5ee5e 100644
+--- a/tools/testing/selftests/mm/hugetlb_dio.c
++++ b/tools/testing/selftests/mm/hugetlb_dio.c
+@@ -76,19 +76,15 @@ void run_dio_using_hugetlb(unsigned int start_off, unsigned int end_off)
+ /* Get the free huge pages after unmap*/
+ free_hpage_a = get_free_hugepages();
+
++ ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
++ ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
++
+ /*
+ * If the no. of free hugepages before allocation and after unmap does
+ * not match - that means there could still be a page which is pinned.
+ */
+- if (free_hpage_a != free_hpage_b) {
+- ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+- ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+- ksft_test_result_fail(": Huge pages not freed!\n");
+- } else {
+- ksft_print_msg("No. Free pages before allocation : %d\n", free_hpage_b);
+- ksft_print_msg("No. Free pages after munmap : %d\n", free_hpage_a);
+- ksft_test_result_pass(": Huge pages freed successfully !\n");
+- }
++ ksft_test_result(free_hpage_a == free_hpage_b,
++ "free huge pages from %u-%u\n", start_off, end_off);
+ }
+
+ int main(void)
+--
+2.47.1
+
--- /dev/null
+From 4a475c0a7eeb3368eca40fe7cb02d157eeddc77a Mon Sep 17 00:00:00 2001
+From: Maximilian Heyne <mheyne@amazon.de>
+Date: Wed, 27 Nov 2024 12:08:53 +0000
+Subject: selftests/damon: add _damon_sysfs.py to TEST_FILES
+
+From: Maximilian Heyne <mheyne@amazon.de>
+
+commit 4a475c0a7eeb3368eca40fe7cb02d157eeddc77a upstream.
+
+When running selftests I encountered the following error message with
+some damon tests:
+
+ # Traceback (most recent call last):
+ # File "[...]/damon/./damos_quota.py", line 7, in <module>
+ # import _damon_sysfs
+ # ModuleNotFoundError: No module named '_damon_sysfs'
+
+Fix this by adding the _damon_sysfs.py file to TEST_FILES so that it
+will be available when running the respective damon selftests.
+
+Link: https://lkml.kernel.org/r/20241127-picks-visitor-7416685b-mheyne@amazon.de
+Fixes: 306abb63a8ca ("selftests/damon: implement a python module for test-purpose DAMON sysfs controls")
+Signed-off-by: Maximilian Heyne <mheyne@amazon.de>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: Shuah Khan <shuah@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ tools/testing/selftests/damon/Makefile | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
+index 5b2a6a5dd1af..812f656260fb 100644
+--- a/tools/testing/selftests/damon/Makefile
++++ b/tools/testing/selftests/damon/Makefile
+@@ -6,7 +6,7 @@ TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
+ TEST_GEN_FILES += debugfs_target_ids_pid_leak
+ TEST_GEN_FILES += access_memory access_memory_even
+
+-TEST_FILES = _chk_dependency.sh _debugfs_common.sh
++TEST_FILES = _chk_dependency.sh _debugfs_common.sh _damon_sysfs.py
+
+ # functionality tests
+ TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+--
+2.47.1
+
bcache-revert-replacing-is_err_or_null-with-is_err-again.patch
revert-readahead-properly-shorten-readahead-when-falling-back-to-do_page_cache_ra.patch
pmdomain-imx-gpcv2-adjust-delay-after-power-up-handshake.patch
+selftests-damon-add-_damon_sysfs.py-to-test_files.patch
+selftest-hugetlb_dio-fix-test-naming.patch
+cacheinfo-allocate-memory-during-cpu-hotplug-if-not-done-from-the-primary-cpu.patch
+x86-cacheinfo-delete-global-num_cache_leaves.patch
+drm-amdkfd-hard-code-cacheline-for-gc943-gc944.patch
+drm-dp_mst-fix-mst-sideband-message-body-length-check.patch
+drm-amdkfd-add-mec-version-that-supports-no-pcie-atomics-for-gfx12.patch
+drm-amd-pm-fix-and-simplify-workload-handling.patch
+drm-dp_mst-verify-request-type-in-the-corresponding-down-message-reply.patch
+drm-dp_mst-fix-resetting-msg-rx-state-after-topology-removal.patch
+drm-amdgpu-rework-resume-handling-for-display-v2.patch
+drm-amd-display-correct-prefetch-calculation.patch
+drm-amd-display-limit-vtotal-range-to-max-hw-cap-minus-fp.patch
+drm-amd-display-add-a-left-edge-pixel-if-in-ycbcr422-or-ycbcr420-and-odm.patch
+drm-amdgpu-hdp6.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp4.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp5.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp7.0-do-a-posting-read-when-flushing-hdp.patch
+drm-amdgpu-hdp5.2-do-a-posting-read-when-flushing-hdp.patch
+modpost-add-.irqentry.text-to-other_sections.patch
+x86-kexec-restore-gdt-on-return-from-preserve_context-kexec.patch
+bpf-fix-oob-devmap-writes-when-deleting-elements.patch
+dma-buf-fix-dma_fence_array_signaled-v4.patch
+dma-fence-fix-reference-leak-on-fence-merge-failure-path.patch
+dma-fence-use-kernel-s-sort-for-merging-fences.patch
+xsk-fix-oob-map-writes-when-deleting-elements.patch
+regmap-detach-regmap-from-dev-on-regmap_exit.patch
+arch_numa-restore-nid-checks-before-registering-a-memblock-with-a-node.patch
+mmc-sdhci-pci-add-dmi-quirk-for-missing-cd-gpio-on-vexia-edu-atla-10-tablet.patch
+mmc-core-further-prevent-card-detect-during-shutdown.patch
+x86-cpu-add-lunar-lake-to-list-of-cpus-with-a-broken-monitor-implementation.patch
scsi-ufs-pltfrm-drop-pm-runtime-reference-count-after-ufshcd_remove.patch
--- /dev/null
+From 9677be09e5e4fbe48aeccb06ae3063c5eba331c3 Mon Sep 17 00:00:00 2001
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Date: Wed, 27 Nov 2024 16:22:47 -0800
+Subject: x86/cacheinfo: Delete global num_cache_leaves
+
+From: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+
+commit 9677be09e5e4fbe48aeccb06ae3063c5eba331c3 upstream.
+
+Linux remembers cpu_cachinfo::num_leaves per CPU, but x86 initializes all
+CPUs from the same global "num_cache_leaves".
+
+This is erroneous on systems such as Meteor Lake, where each CPU has a
+distinct num_leaves value. Delete the global "num_cache_leaves" and
+initialize num_leaves on each CPU.
+
+init_cache_level() no longer needs to set num_leaves. Also, it never had to
+set num_levels as it is unnecessary in x86. Keep checking for zero cache
+leaves. Such condition indicates a bug.
+
+ [ bp: Cleanup. ]
+
+Signed-off-by: Ricardo Neri <ricardo.neri-calderon@linux.intel.com>
+Signed-off-by: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: stable@vger.kernel.org # 6.3+
+Link: https://lore.kernel.org/r/20241128002247.26726-3-ricardo.neri-calderon@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/cacheinfo.c | 43 +++++++++++++++++++---------------------
+ 1 file changed, 21 insertions(+), 22 deletions(-)
+
+--- a/arch/x86/kernel/cpu/cacheinfo.c
++++ b/arch/x86/kernel/cpu/cacheinfo.c
+@@ -178,8 +178,6 @@ struct _cpuid4_info_regs {
+ struct amd_northbridge *nb;
+ };
+
+-static unsigned short num_cache_leaves;
+-
+ /* AMD doesn't have CPUID4. Emulate it here to report the same
+ information to the user. This makes some assumptions about the machine:
+ L2 not shared, no SMT etc. that is currently true on AMD CPUs.
+@@ -717,20 +715,23 @@ void cacheinfo_hygon_init_llc_id(struct
+
+ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
+ {
++ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+
+ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+- num_cache_leaves = find_num_cache_leaves(c);
++ ci->num_leaves = find_num_cache_leaves(c);
+ } else if (c->extended_cpuid_level >= 0x80000006) {
+ if (cpuid_edx(0x80000006) & 0xf000)
+- num_cache_leaves = 4;
++ ci->num_leaves = 4;
+ else
+- num_cache_leaves = 3;
++ ci->num_leaves = 3;
+ }
+ }
+
+ void init_hygon_cacheinfo(struct cpuinfo_x86 *c)
+ {
+- num_cache_leaves = find_num_cache_leaves(c);
++ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
++
++ ci->num_leaves = find_num_cache_leaves(c);
+ }
+
+ void init_intel_cacheinfo(struct cpuinfo_x86 *c)
+@@ -740,21 +741,21 @@ void init_intel_cacheinfo(struct cpuinfo
+ unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
+ unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
+ unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
++ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index);
+
+ if (c->cpuid_level > 3) {
+- static int is_initialized;
+-
+- if (is_initialized == 0) {
+- /* Init num_cache_leaves from boot CPU */
+- num_cache_leaves = find_num_cache_leaves(c);
+- is_initialized++;
+- }
++ /*
++ * There should be at least one leaf. A non-zero value means
++ * that the number of leaves has been initialized.
++ */
++ if (!ci->num_leaves)
++ ci->num_leaves = find_num_cache_leaves(c);
+
+ /*
+ * Whenever possible use cpuid(4), deterministic cache
+ * parameters cpuid leaf to find the cache details
+ */
+- for (i = 0; i < num_cache_leaves; i++) {
++ for (i = 0; i < ci->num_leaves; i++) {
+ struct _cpuid4_info_regs this_leaf = {};
+ int retval;
+
+@@ -790,14 +791,14 @@ void init_intel_cacheinfo(struct cpuinfo
+ * Don't use cpuid2 if cpuid4 is supported. For P4, we use cpuid2 for
+ * trace cache
+ */
+- if ((num_cache_leaves == 0 || c->x86 == 15) && c->cpuid_level > 1) {
++ if ((!ci->num_leaves || c->x86 == 15) && c->cpuid_level > 1) {
+ /* supports eax=2 call */
+ int j, n;
+ unsigned int regs[4];
+ unsigned char *dp = (unsigned char *)regs;
+ int only_trace = 0;
+
+- if (num_cache_leaves != 0 && c->x86 == 15)
++ if (ci->num_leaves && c->x86 == 15)
+ only_trace = 1;
+
+ /* Number of times to iterate */
+@@ -991,14 +992,12 @@ static void ci_leaf_init(struct cacheinf
+
+ int init_cache_level(unsigned int cpu)
+ {
+- struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
++ struct cpu_cacheinfo *ci = get_cpu_cacheinfo(cpu);
+
+- if (!num_cache_leaves)
++ /* There should be at least one leaf. */
++ if (!ci->num_leaves)
+ return -ENOENT;
+- if (!this_cpu_ci)
+- return -EINVAL;
+- this_cpu_ci->num_levels = 3;
+- this_cpu_ci->num_leaves = num_cache_leaves;
++
+ return 0;
+ }
+
--- /dev/null
+From c9a4b55431e5220347881e148725bed69c84e037 Mon Sep 17 00:00:00 2001
+From: Len Brown <len.brown@intel.com>
+Date: Tue, 12 Nov 2024 21:07:00 -0500
+Subject: x86/cpu: Add Lunar Lake to list of CPUs with a broken MONITOR implementation
+
+From: Len Brown <len.brown@intel.com>
+
+commit c9a4b55431e5220347881e148725bed69c84e037 upstream.
+
+Under some conditions, MONITOR wakeups on Lunar Lake processors
+can be lost, resulting in significant user-visible delays.
+
+Add Lunar Lake to X86_BUG_MONITOR so that wake_up_idle_cpu()
+always sends an IPI, avoiding this potential delay.
+
+Reported originally here:
+
+ https://bugzilla.kernel.org/show_bug.cgi?id=219364
+
+[ dhansen: tweak subject ]
+
+Signed-off-by: Len Brown <len.brown@intel.com>
+Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
+Reviewed-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Cc:stable@vger.kernel.org
+Link: https://lore.kernel.org/all/a4aa8842a3c3bfdb7fe9807710eef159cbf0e705.1731463305.git.len.brown%40intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/cpu/intel.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -586,7 +586,9 @@ static void init_intel(struct cpuinfo_x8
+ c->x86_vfm == INTEL_WESTMERE_EX))
+ set_cpu_bug(c, X86_BUG_CLFLUSH_MONITOR);
+
+- if (boot_cpu_has(X86_FEATURE_MWAIT) && c->x86_vfm == INTEL_ATOM_GOLDMONT)
++ if (boot_cpu_has(X86_FEATURE_MWAIT) &&
++ (c->x86_vfm == INTEL_ATOM_GOLDMONT ||
++ c->x86_vfm == INTEL_LUNARLAKE_M))
+ set_cpu_bug(c, X86_BUG_MONITOR);
+
+ #ifdef CONFIG_X86_64
--- /dev/null
+From 07fa619f2a40c221ea27747a3323cabc59ab25eb Mon Sep 17 00:00:00 2001
+From: David Woodhouse <dwmw@amazon.co.uk>
+Date: Thu, 5 Dec 2024 15:05:07 +0000
+Subject: x86/kexec: Restore GDT on return from ::preserve_context kexec
+
+From: David Woodhouse <dwmw@amazon.co.uk>
+
+commit 07fa619f2a40c221ea27747a3323cabc59ab25eb upstream.
+
+The restore_processor_state() function explicitly states that "the asm code
+that gets us here will have restored a usable GDT". That wasn't true in the
+case of returning from a ::preserve_context kexec. Make it so.
+
+Without this, the kernel was depending on the called function to reload a
+GDT which is appropriate for the kernel before returning.
+
+Test program:
+
+ #include <unistd.h>
+ #include <errno.h>
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <linux/kexec.h>
+ #include <linux/reboot.h>
+ #include <sys/reboot.h>
+ #include <sys/syscall.h>
+
+ int main (void)
+ {
+ struct kexec_segment segment = {};
+ unsigned char purgatory[] = {
+ 0x66, 0xba, 0xf8, 0x03, // mov $0x3f8, %dx
+ 0xb0, 0x42, // mov $0x42, %al
+ 0xee, // outb %al, (%dx)
+ 0xc3, // ret
+ };
+ int ret;
+
+ segment.buf = &purgatory;
+ segment.bufsz = sizeof(purgatory);
+ segment.mem = (void *)0x400000;
+ segment.memsz = 0x1000;
+ ret = syscall(__NR_kexec_load, 0x400000, 1, &segment, KEXEC_PRESERVE_CONTEXT);
+ if (ret) {
+ perror("kexec_load");
+ exit(1);
+ }
+
+ ret = syscall(__NR_reboot, LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, LINUX_REBOOT_CMD_KEXEC);
+ if (ret) {
+ perror("kexec reboot");
+ exit(1);
+ }
+ printf("Success\n");
+ return 0;
+ }
+
+Signed-off-by: David Woodhouse <dwmw@amazon.co.uk>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241205153343.3275139-2-dwmw2@infradead.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ arch/x86/kernel/relocate_kernel_64.S | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+--- a/arch/x86/kernel/relocate_kernel_64.S
++++ b/arch/x86/kernel/relocate_kernel_64.S
+@@ -242,6 +242,13 @@ SYM_CODE_START_LOCAL_NOALIGN(virtual_map
+ movq CR0(%r8), %r8
+ movq %rax, %cr3
+ movq %r8, %cr0
++
++#ifdef CONFIG_KEXEC_JUMP
++ /* Saved in save_processor_state. */
++ movq $saved_context, %rax
++ lgdt saved_context_gdt_desc(%rax)
++#endif
++
+ movq %rbp, %rax
+
+ popf
--- /dev/null
+From 32cd3db7de97c0c7a018756ce66244342fd583f0 Mon Sep 17 00:00:00 2001
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Date: Fri, 22 Nov 2024 13:10:29 +0100
+Subject: xsk: fix OOB map writes when deleting elements
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+
+commit 32cd3db7de97c0c7a018756ce66244342fd583f0 upstream.
+
+Jordy says:
+
+"
+In the xsk_map_delete_elem function an unsigned integer
+(map->max_entries) is compared with a user-controlled signed integer
+(k). Due to implicit type conversion, a large unsigned value for
+map->max_entries can bypass the intended bounds check:
+
+ if (k >= map->max_entries)
+ return -EINVAL;
+
+This allows k to hold a negative value (between -2147483648 and -2),
+which is then used as an array index in m->xsk_map[k], which results
+in an out-of-bounds access.
+
+ spin_lock_bh(&m->lock);
+ map_entry = &m->xsk_map[k]; // Out-of-bounds map_entry
+ old_xs = unrcu_pointer(xchg(map_entry, NULL)); // Oob write
+ if (old_xs)
+ xsk_map_sock_delete(old_xs, map_entry);
+ spin_unlock_bh(&m->lock);
+
+The xchg operation can then be used to cause an out-of-bounds write.
+Moreover, the invalid map_entry passed to xsk_map_sock_delete can lead
+to further memory corruption.
+"
+
+It indeed results in following splat:
+
+[76612.897343] BUG: unable to handle page fault for address: ffffc8fc2e461108
+[76612.904330] #PF: supervisor write access in kernel mode
+[76612.909639] #PF: error_code(0x0002) - not-present page
+[76612.914855] PGD 0 P4D 0
+[76612.917431] Oops: Oops: 0002 [#1] PREEMPT SMP
+[76612.921859] CPU: 11 UID: 0 PID: 10318 Comm: a.out Not tainted 6.12.0-rc1+ #470
+[76612.929189] Hardware name: Intel Corporation S2600WFT/S2600WFT, BIOS SE5C620.86B.02.01.0008.031920191559 03/19/2019
+[76612.939781] RIP: 0010:xsk_map_delete_elem+0x2d/0x60
+[76612.944738] Code: 00 00 41 54 55 53 48 63 2e 3b 6f 24 73 38 4c 8d a7 f8 00 00 00 48 89 fb 4c 89 e7 e8 2d bf 05 00 48 8d b4 eb 00 01 00 00 31 ff <48> 87 3e 48 85 ff 74 05 e8 16 ff ff ff 4c 89 e7 e8 3e bc 05 00 31
+[76612.963774] RSP: 0018:ffffc9002e407df8 EFLAGS: 00010246
+[76612.969079] RAX: 0000000000000000 RBX: ffffc9002e461000 RCX: 0000000000000000
+[76612.976323] RDX: 0000000000000001 RSI: ffffc8fc2e461108 RDI: 0000000000000000
+[76612.983569] RBP: ffffffff80000001 R08: 0000000000000000 R09: 0000000000000007
+[76612.990812] R10: ffffc9002e407e18 R11: ffff888108a38858 R12: ffffc9002e4610f8
+[76612.998060] R13: ffff888108a38858 R14: 00007ffd1ae0ac78 R15: ffffc9002e4610c0
+[76613.005303] FS: 00007f80b6f59740(0000) GS:ffff8897e0ec0000(0000) knlGS:0000000000000000
+[76613.013517] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[76613.019349] CR2: ffffc8fc2e461108 CR3: 000000011e3ef001 CR4: 00000000007726f0
+[76613.026595] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+[76613.033841] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+[76613.041086] PKRU: 55555554
+[76613.043842] Call Trace:
+[76613.046331] <TASK>
+[76613.048468] ? __die+0x20/0x60
+[76613.051581] ? page_fault_oops+0x15a/0x450
+[76613.055747] ? search_extable+0x22/0x30
+[76613.059649] ? search_bpf_extables+0x5f/0x80
+[76613.063988] ? exc_page_fault+0xa9/0x140
+[76613.067975] ? asm_exc_page_fault+0x22/0x30
+[76613.072229] ? xsk_map_delete_elem+0x2d/0x60
+[76613.076573] ? xsk_map_delete_elem+0x23/0x60
+[76613.080914] __sys_bpf+0x19b7/0x23c0
+[76613.084555] __x64_sys_bpf+0x1a/0x20
+[76613.088194] do_syscall_64+0x37/0xb0
+[76613.091832] entry_SYSCALL_64_after_hwframe+0x4b/0x53
+[76613.096962] RIP: 0033:0x7f80b6d1e88d
+[76613.100592] Code: 5b 41 5c c3 66 0f 1f 84 00 00 00 00 00 f3 0f 1e fa 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d 73 b5 0f 00 f7 d8 64 89 01 48
+[76613.119631] RSP: 002b:00007ffd1ae0ac68 EFLAGS: 00000206 ORIG_RAX: 0000000000000141
+[76613.131330] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f80b6d1e88d
+[76613.142632] RDX: 0000000000000098 RSI: 00007ffd1ae0ad20 RDI: 0000000000000003
+[76613.153967] RBP: 00007ffd1ae0adc0 R08: 0000000000000000 R09: 0000000000000000
+[76613.166030] R10: 00007f80b6f77040 R11: 0000000000000206 R12: 00007ffd1ae0aed8
+[76613.177130] R13: 000055ddf42ce1e9 R14: 000055ddf42d0d98 R15: 00007f80b6fab040
+[76613.188129] </TASK>
+
+Fix this by simply changing key type from int to u32.
+
+Fixes: fbfc504a24f5 ("bpf: introduce new bpf AF_XDP map type BPF_MAP_TYPE_XSKMAP")
+CC: stable@vger.kernel.org
+Reported-by: Jordy Zomer <jordyzomer@google.com>
+Suggested-by: Jordy Zomer <jordyzomer@google.com>
+Reviewed-by: Toke Høiland-Jørgensen <toke@redhat.com>
+Acked-by: John Fastabend <john.fastabend@gmail.com>
+Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Link: https://lore.kernel.org/r/20241122121030.716788-2-maciej.fijalkowski@intel.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ net/xdp/xskmap.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/net/xdp/xskmap.c
++++ b/net/xdp/xskmap.c
+@@ -224,7 +224,7 @@ static long xsk_map_delete_elem(struct b
+ struct xsk_map *m = container_of(map, struct xsk_map, map);
+ struct xdp_sock __rcu **map_entry;
+ struct xdp_sock *old_xs;
+- int k = *(u32 *)key;
++ u32 k = *(u32 *)key;
+
+ if (k >= map->max_entries)
+ return -EINVAL;