]> git.ipfire.org Git - thirdparty/kernel/stable-queue.git/commitdiff
5.15-stable patches
authorGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Sep 2025 08:46:59 +0000 (10:46 +0200)
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>
Wed, 17 Sep 2025 08:46:59 +0000 (10:46 +0200)
added patches:
drm-amdgpu-fix-a-memory-leak-in-fence-cleanup-when-unloading.patch
drm-i915-power-fix-size-for-for_each_set_bit-in-abox-iteration.patch
mm-memory-failure-fix-vm_bug_on_page-pagepoisoned-page-when-unpoison-memory.patch

queue-5.15/drm-amdgpu-fix-a-memory-leak-in-fence-cleanup-when-unloading.patch [new file with mode: 0644]
queue-5.15/drm-i915-power-fix-size-for-for_each_set_bit-in-abox-iteration.patch [new file with mode: 0644]
queue-5.15/mm-memory-failure-fix-vm_bug_on_page-pagepoisoned-page-when-unpoison-memory.patch [new file with mode: 0644]
queue-5.15/series

diff --git a/queue-5.15/drm-amdgpu-fix-a-memory-leak-in-fence-cleanup-when-unloading.patch b/queue-5.15/drm-amdgpu-fix-a-memory-leak-in-fence-cleanup-when-unloading.patch
new file mode 100644 (file)
index 0000000..9a1d759
--- /dev/null
@@ -0,0 +1,44 @@
+From stable+bounces-179597-greg=kroah.com@vger.kernel.org Mon Sep 15 04:43:57 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 14 Sep 2025 22:43:50 -0400
+Subject: drm/amdgpu: fix a memory leak in fence cleanup when unloading
+To: stable@vger.kernel.org
+Cc: "Alex Deucher" <alexander.deucher@amd.com>, "Lin.Cao" <lincao12@amd.com>, "Vitaly Prosyak" <vitaly.prosyak@amd.com>, "Christian König" <christian.koenig@amd.com>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20250915024350.396085-1-sashal@kernel.org>
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit 7838fb5f119191403560eca2e23613380c0e425e ]
+
+Commit b61badd20b44 ("drm/amdgpu: fix usage slab after free")
+reordered when amdgpu_fence_driver_sw_fini() was called after
+that patch, amdgpu_fence_driver_sw_fini() effectively became
+a no-op as the sched entities we never freed because the
+ring pointers were already set to NULL.  Remove the NULL
+setting.
+
+Reported-by: Lin.Cao <lincao12@amd.com>
+Cc: Vitaly Prosyak <vitaly.prosyak@amd.com>
+Cc: Christian König <christian.koenig@amd.com>
+Fixes: b61badd20b44 ("drm/amdgpu: fix usage slab after free")
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit a525fa37aac36c4591cc8b07ae8957862415fbd5)
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c |    2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+@@ -302,8 +302,6 @@ void amdgpu_ring_fini(struct amdgpu_ring
+       dma_fence_put(ring->vmid_wait);
+       ring->vmid_wait = NULL;
+       ring->me = 0;
+-
+-      ring->adev->rings[ring->idx] = NULL;
+ }
+ /**
diff --git a/queue-5.15/drm-i915-power-fix-size-for-for_each_set_bit-in-abox-iteration.patch b/queue-5.15/drm-i915-power-fix-size-for-for_each_set_bit-in-abox-iteration.patch
new file mode 100644 (file)
index 0000000..cec402a
--- /dev/null
@@ -0,0 +1,59 @@
+From stable+bounces-179588-greg=kroah.com@vger.kernel.org Sun Sep 14 22:21:42 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 14 Sep 2025 16:21:35 -0400
+Subject: drm/i915/power: fix size for for_each_set_bit() in abox iteration
+To: stable@vger.kernel.org
+Cc: "Jani Nikula" <jani.nikula@intel.com>, "Ville Syrjälä" <ville.syrjala@linux.intel.com>, "Matt Roper" <matthew.d.roper@intel.com>, "Tvrtko Ursulin" <tursulin@ursulin.net>, "Sasha Levin" <sashal@kernel.org>
+Message-ID: <20250914202135.202071-1-sashal@kernel.org>
+
+From: Jani Nikula <jani.nikula@intel.com>
+
+[ Upstream commit cfa7b7659757f8d0fc4914429efa90d0d2577dd7 ]
+
+for_each_set_bit() expects size to be in bits, not bytes. The abox mask
+iteration uses bytes, but it works by coincidence, because the local
+variable holding the mask is unsigned long, and the mask only ever has
+bit 2 as the highest bit. Using a smaller type could lead to subtle and
+very hard to track bugs.
+
+Fixes: 62afef2811e4 ("drm/i915/rkl: RKL uses ABOX0 for pixel transfers")
+Cc: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Cc: Matt Roper <matthew.d.roper@intel.com>
+Cc: stable@vger.kernel.org # v5.9+
+Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
+Link: https://lore.kernel.org/r/20250905104149.1144751-1-jani.nikula@intel.com
+Signed-off-by: Jani Nikula <jani.nikula@intel.com>
+(cherry picked from commit 7ea3baa6efe4bb93d11e1c0e6528b1468d7debf6)
+Signed-off-by: Tvrtko Ursulin <tursulin@ursulin.net>
+[ adapted struct intel_display *display parameters to struct drm_i915_private *dev_priv ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ drivers/gpu/drm/i915/display/intel_display_power.c |    6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+--- a/drivers/gpu/drm/i915/display/intel_display_power.c
++++ b/drivers/gpu/drm/i915/display/intel_display_power.c
+@@ -5293,7 +5293,7 @@ static void icl_mbus_init(struct drm_i91
+       if (DISPLAY_VER(dev_priv) == 12)
+               abox_regs |= BIT(0);
+-      for_each_set_bit(i, &abox_regs, sizeof(abox_regs))
++      for_each_set_bit(i, &abox_regs, BITS_PER_TYPE(abox_regs))
+               intel_de_rmw(dev_priv, MBUS_ABOX_CTL(i), mask, val);
+ }
+@@ -5754,11 +5754,11 @@ static void tgl_bw_buddy_init(struct drm
+       if (table[config].page_mask == 0) {
+               drm_dbg(&dev_priv->drm,
+                       "Unknown memory configuration; disabling address buddy logic.\n");
+-              for_each_set_bit(i, &abox_mask, sizeof(abox_mask))
++              for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask))
+                       intel_de_write(dev_priv, BW_BUDDY_CTL(i),
+                                      BW_BUDDY_DISABLE);
+       } else {
+-              for_each_set_bit(i, &abox_mask, sizeof(abox_mask)) {
++              for_each_set_bit(i, &abox_mask, BITS_PER_TYPE(abox_mask)) {
+                       intel_de_write(dev_priv, BW_BUDDY_PAGE_MASK(i),
+                                      table[config].page_mask);
diff --git a/queue-5.15/mm-memory-failure-fix-vm_bug_on_page-pagepoisoned-page-when-unpoison-memory.patch b/queue-5.15/mm-memory-failure-fix-vm_bug_on_page-pagepoisoned-page-when-unpoison-memory.patch
new file mode 100644 (file)
index 0000000..7c6e175
--- /dev/null
@@ -0,0 +1,113 @@
+From stable+bounces-179571-greg=kroah.com@vger.kernel.org Sun Sep 14 15:12:10 2025
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 14 Sep 2025 09:12:01 -0400
+Subject: mm/memory-failure: fix VM_BUG_ON_PAGE(PagePoisoned(page)) when unpoison memory
+To: stable@vger.kernel.org
+Cc: Miaohe Lin <linmiaohe@huawei.com>, David Hildenbrand <david@redhat.com>, Naoya Horiguchi <nao.horiguchi@gmail.com>, Andrew Morton <akpm@linux-foundation.org>, Sasha Levin <sashal@kernel.org>
+Message-ID: <20250914131201.67627-1-sashal@kernel.org>
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit d613f53c83ec47089c4e25859d5e8e0359f6f8da ]
+
+When I did memory failure tests, below panic occurs:
+
+page dumped because: VM_BUG_ON_PAGE(PagePoisoned(page))
+kernel BUG at include/linux/page-flags.h:616!
+Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
+CPU: 3 PID: 720 Comm: bash Not tainted 6.10.0-rc1-00195-g148743902568 #40
+RIP: 0010:unpoison_memory+0x2f3/0x590
+RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246
+RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8
+RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0
+RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb
+R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000
+R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe
+FS:  00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0
+Call Trace:
+ <TASK>
+ unpoison_memory+0x2f3/0x590
+ simple_attr_write_xsigned.constprop.0.isra.0+0xb3/0x110
+ debugfs_attr_write+0x42/0x60
+ full_proxy_write+0x5b/0x80
+ vfs_write+0xd5/0x540
+ ksys_write+0x64/0xe0
+ do_syscall_64+0xb9/0x1d0
+ entry_SYSCALL_64_after_hwframe+0x77/0x7f
+RIP: 0033:0x7f08f0314887
+RSP: 002b:00007ffece710078 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+RAX: ffffffffffffffda RBX: 0000000000000009 RCX: 00007f08f0314887
+RDX: 0000000000000009 RSI: 0000564787a30410 RDI: 0000000000000001
+RBP: 0000564787a30410 R08: 000000000000fefe R09: 000000007fffffff
+R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000009
+R13: 00007f08f041b780 R14: 00007f08f0417600 R15: 00007f08f0416a00
+ </TASK>
+Modules linked in: hwpoison_inject
+---[ end trace 0000000000000000 ]---
+RIP: 0010:unpoison_memory+0x2f3/0x590
+RSP: 0018:ffffa57fc8787d60 EFLAGS: 00000246
+RAX: 0000000000000037 RBX: 0000000000000009 RCX: ffff9be25fcdc9c8
+RDX: 0000000000000000 RSI: 0000000000000027 RDI: ffff9be25fcdc9c0
+RBP: 0000000000300000 R08: ffffffffb4956f88 R09: 0000000000009ffb
+R10: 0000000000000284 R11: ffffffffb4926fa0 R12: ffffe6b00c000000
+R13: ffff9bdb453dfd00 R14: 0000000000000000 R15: fffffffffffffffe
+FS:  00007f08f04e4740(0000) GS:ffff9be25fcc0000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000564787a30410 CR3: 000000010d4e2000 CR4: 00000000000006f0
+Kernel panic - not syncing: Fatal exception
+Kernel Offset: 0x31c00000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
+---[ end Kernel panic - not syncing: Fatal exception ]---
+
+The root cause is that unpoison_memory() tries to check the PG_HWPoison
+flags of an uninitialized page.  So VM_BUG_ON_PAGE(PagePoisoned(page)) is
+triggered.  This can be reproduced by below steps:
+
+1.Offline memory block:
+
+ echo offline > /sys/devices/system/memory/memory12/state
+
+2.Get offlined memory pfn:
+
+ page-types -b n -rlN
+
+3.Write pfn to unpoison-pfn
+
+ echo <pfn> > /sys/kernel/debug/hwpoison/unpoison-pfn
+
+This scenario can be identified by pfn_to_online_page() returning NULL.
+And ZONE_DEVICE pages are never expected, so we can simply fail if
+pfn_to_online_page() == NULL to fix the bug.
+
+Link: https://lkml.kernel.org/r/20250828024618.1744895-1-linmiaohe@huawei.com
+Fixes: f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded memory to zones until online")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Suggested-by: David Hildenbrand <david@redhat.com>
+Acked-by: David Hildenbrand <david@redhat.com>
+Cc: Naoya Horiguchi <nao.horiguchi@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+[ Adjust context ]
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+---
+ mm/memory-failure.c |    7 +++----
+ 1 file changed, 3 insertions(+), 4 deletions(-)
+
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -2056,10 +2056,9 @@ int unpoison_memory(unsigned long pfn)
+       static DEFINE_RATELIMIT_STATE(unpoison_rs, DEFAULT_RATELIMIT_INTERVAL,
+                                       DEFAULT_RATELIMIT_BURST);
+-      if (!pfn_valid(pfn))
+-              return -ENXIO;
+-
+-      p = pfn_to_page(pfn);
++      p = pfn_to_online_page(pfn);
++      if (!p)
++              return -EIO;
+       page = compound_head(p);
+       mutex_lock(&mf_mutex);
index befdd473531c7626a118c6a971fad5fdaef906b7..f81f24266f9b12c4b015ee5085973314928e76d7 100644 (file)
@@ -59,3 +59,6 @@ dmaengine-qcom-bam_dma-fix-dt-error-handling-for-num-channels-ees.patch
 phy-tegra-xusb-fix-device-and-of-node-leak-at-probe.patch
 phy-ti-pipe3-fix-device-leak-at-unbind.patch
 soc-qcom-mdt_loader-deal-with-zero-e_shentsize.patch
+drm-amdgpu-fix-a-memory-leak-in-fence-cleanup-when-unloading.patch
+drm-i915-power-fix-size-for-for_each_set_bit-in-abox-iteration.patch
+mm-memory-failure-fix-vm_bug_on_page-pagepoisoned-page-when-unpoison-memory.patch