Fixes for 6.11

author Sasha Levin <sashal@kernel.org>

Wed, 6 Nov 2024 01:54:04 +0000 (20:54 -0500)

committer Sasha Levin <sashal@kernel.org>

Wed, 6 Nov 2024 01:55:02 +0000 (20:55 -0500)
author Sasha Levin <sashal@kernel.org>
Wed, 6 Nov 2024 01:54:04 +0000 (20:54 -0500)
committer Sasha Levin <sashal@kernel.org>
Wed, 6 Nov 2024 01:55:02 +0000 (20:55 -0500)
diff --git a/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch b/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch

new file mode 100644 (file)

index 0000000..a2b3c9b
--- /dev/null
+++ b/queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch
@@ -0,0 +1,101 @@
+From 733b0f88c8d512282b90d5a1903a5e90c0b76cca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 16:49:58 +0200
+Subject: accel/ivpu: Fix NOC firewall interrupt handling
+
+From: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
+
+[ Upstream commit 72f7e16eccddde99386a10eb2d08833e805917c6 ]
+
+The NOC firewall interrupt means that the HW prevented
+unauthorized access to a protected resource, so there
+is no need to trigger device reset in such case.
+
+To facilitate security testing add firewall_irq_counter
+debugfs file that tracks firewall interrupts.
+
+Fixes: 8a27ad81f7d3 ("accel/ivpu: Split IP and buttress code")
+Cc: stable@vger.kernel.org # v6.11+
+Signed-off-by: Andrzej Kacprowski <Andrzej.Kacprowski@intel.com>
+Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com>
+Signed-off-by: Jacek Lawrynowicz <jacek.lawrynowicz@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241017144958.79327-1-jacek.lawrynowicz@linux.intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/accel/ivpu/ivpu_debugfs.c | 9 +++++++++
+ drivers/accel/ivpu/ivpu_hw.c      | 1 +
+ drivers/accel/ivpu/ivpu_hw.h      | 1 +
+ drivers/accel/ivpu/ivpu_hw_ip.c   | 5 ++++-
+ 4 files changed, 15 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
+index 6f86f8df30db0..8d50981594d15 100644
+--- a/drivers/accel/ivpu/ivpu_debugfs.c
++++ b/drivers/accel/ivpu/ivpu_debugfs.c
+@@ -108,6 +108,14 @@ static int reset_pending_show(struct seq_file *s, void *v)
+       return 0;
+ }
+ 
++static int firewall_irq_counter_show(struct seq_file *s, void *v)
++{
++      struct ivpu_device *vdev = seq_to_ivpu(s);
++
++      seq_printf(s, "%d\n", atomic_read(&vdev->hw->firewall_irq_counter));
++      return 0;
++}
++
+ static const struct drm_debugfs_info vdev_debugfs_list[] = {
+       {"bo_list", bo_list_show, 0},
+       {"fw_name", fw_name_show, 0},
+@@ -116,6 +124,7 @@ static const struct drm_debugfs_info vdev_debugfs_list[] = {
+       {"last_bootmode", last_bootmode_show, 0},
+       {"reset_counter", reset_counter_show, 0},
+       {"reset_pending", reset_pending_show, 0},
++      {"firewall_irq_counter", firewall_irq_counter_show, 0},
+ };
+ 
+ static ssize_t
+diff --git a/drivers/accel/ivpu/ivpu_hw.c b/drivers/accel/ivpu/ivpu_hw.c
+index 27f0fe4d54e00..e69c0613513f1 100644
+--- a/drivers/accel/ivpu/ivpu_hw.c
++++ b/drivers/accel/ivpu/ivpu_hw.c
+@@ -249,6 +249,7 @@ int ivpu_hw_init(struct ivpu_device *vdev)
+       platform_init(vdev);
+       wa_init(vdev);
+       timeouts_init(vdev);
++      atomic_set(&vdev->hw->firewall_irq_counter, 0);
+ 
+       return 0;
+ }
+diff --git a/drivers/accel/ivpu/ivpu_hw.h b/drivers/accel/ivpu/ivpu_hw.h
+index 1c0c98e3afb88..a96a05b2acda9 100644
+--- a/drivers/accel/ivpu/ivpu_hw.h
++++ b/drivers/accel/ivpu/ivpu_hw.h
+@@ -52,6 +52,7 @@ struct ivpu_hw_info {
+       int dma_bits;
+       ktime_t d0i3_entry_host_ts;
+       u64 d0i3_entry_vpu_ts;
++      atomic_t firewall_irq_counter;
+ };
+ 
+ int ivpu_hw_init(struct ivpu_device *vdev);
+diff --git a/drivers/accel/ivpu/ivpu_hw_ip.c b/drivers/accel/ivpu/ivpu_hw_ip.c
+index dfd2f4a5b5268..60b33fc59d96e 100644
+--- a/drivers/accel/ivpu/ivpu_hw_ip.c
++++ b/drivers/accel/ivpu/ivpu_hw_ip.c
+@@ -1062,7 +1062,10 @@ static void irq_wdt_mss_handler(struct ivpu_device *vdev)
+ 
+ static void irq_noc_firewall_handler(struct ivpu_device *vdev)
+ {
+-      ivpu_pm_trigger_recovery(vdev, "NOC Firewall IRQ");
++      atomic_inc(&vdev->hw->firewall_irq_counter);
++
++      ivpu_dbg(vdev, IRQ, "NOC Firewall interrupt detected, counter %d\n",
++               atomic_read(&vdev->hw->firewall_irq_counter));
+ }
+ 
+ /* Handler for IRQs from NPU core */
+-- 
+2.43.0
+
diff --git a/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch

new file mode 100644 (file)

index 0000000..874b939
--- /dev/null
+++ b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch
@@ -0,0 +1,36 @@
+From a2816e4957d37d5a472129d2feffc1f586662fdc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Oct 2024 16:16:52 +0100
+Subject: ALSA: hda/realtek: Fix headset mic on TUXEDO Gemini 17 Gen3
+
+From: Christoffer Sandberg <cs@tuxedo.de>
+
+[ Upstream commit 0b04fbe886b4274c8e5855011233aaa69fec6e75 ]
+
+Quirk is needed to enable headset microphone on missing pin 0x19.
+
+Signed-off-by: Christoffer Sandberg <cs@tuxedo.de>
+Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20241029151653.80726-1-wse@tuxedocomputers.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 8d6f446d507c2..25d4c417d3c07 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10729,6 +10729,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x1558, 0x1404, "Clevo N150CU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x14a1, "Clevo L141MU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x2624, "Clevo L240TU", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++      SND_PCI_QUIRK(0x1558, 0x28c1, "Clevo V370VND", ALC2XX_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+-- 
+2.43.0
+
diff --git a/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch

new file mode 100644 (file)

index 0000000..44f2deb
--- /dev/null
+++ b/queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch
@@ -0,0 +1,36 @@
+From 83b52e2959734734c3d15f52435cec1a85988591 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Oct 2024 16:16:53 +0100
+Subject: ALSA: hda/realtek: Fix headset mic on TUXEDO Stellaris 16 Gen6 mb1
+
+From: Christoffer Sandberg <cs@tuxedo.de>
+
+[ Upstream commit e49370d769e71456db3fbd982e95bab8c69f73e8 ]
+
+Quirk is needed to enable headset microphone on missing pin 0x19.
+
+Signed-off-by: Christoffer Sandberg <cs@tuxedo.de>
+Signed-off-by: Werner Sembach <wse@tuxedocomputers.com>
+Cc: <stable@vger.kernel.org>
+Link: https://patch.msgid.link/20241029151653.80726-2-wse@tuxedocomputers.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 25d4c417d3c07..660fd984a9285 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -10971,6 +10971,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x1d05, 0x115c, "TongFang GMxTGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x121b, "TongFang GMxAGxx", ALC269_FIXUP_NO_SHUTUP),
+       SND_PCI_QUIRK(0x1d05, 0x1387, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
++      SND_PCI_QUIRK(0x1d05, 0x1409, "TongFang GMxIXxx", ALC2XX_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d17, 0x3288, "Haier Boyue G42", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
+       SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
+-- 
+2.43.0
+
diff --git a/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch b/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch

new file mode 100644 (file)

index 0000000..905429e
--- /dev/null
+++ b/queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch
@@ -0,0 +1,97 @@
+From a9024ea866983de0fa38466135dfca87b37dffc4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 13:53:24 +0800
+Subject: ALSA: hda/realtek: Limit internal Mic boost on Dell platform
+
+From: Kailang Yang <kailang@realtek.com>
+
+[ Upstream commit 78e7be018784934081afec77f96d49a2483f9188 ]
+
+Dell want to limit internal Mic boost on all Dell platform.
+
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/561fc5f5eff04b6cbd79ed173cd1c1db@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 21 ++++++++++++++++++---
+ 1 file changed, 18 insertions(+), 3 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 2583081c0a3a5..8d6f446d507c2 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7507,6 +7507,7 @@ enum {
+       ALC286_FIXUP_SONY_MIC_NO_PRESENCE,
+       ALC269_FIXUP_PINCFG_NO_HP_TO_LINEOUT,
+       ALC269_FIXUP_DELL1_MIC_NO_PRESENCE,
++      ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+       ALC269_FIXUP_DELL2_MIC_NO_PRESENCE,
+       ALC269_FIXUP_DELL3_MIC_NO_PRESENCE,
+       ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+@@ -7541,6 +7542,7 @@ enum {
+       ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
+       ALC255_FIXUP_ASUS_MIC_NO_PRESENCE,
+       ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++      ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+       ALC255_FIXUP_DELL2_MIC_NO_PRESENCE,
+       ALC255_FIXUP_HEADSET_MODE,
+       ALC255_FIXUP_HEADSET_MODE_NO_HP_MIC,
+@@ -8102,6 +8104,12 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
++      [ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc269_fixup_limit_int_mic_boost,
++              .chained = true,
++              .chain_id = ALC269_FIXUP_DELL1_MIC_NO_PRESENCE
++      },
+       [ALC269_FIXUP_DELL2_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+@@ -8382,6 +8390,12 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC255_FIXUP_HEADSET_MODE
+       },
++      [ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = alc269_fixup_limit_int_mic_boost,
++              .chained = true,
++              .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE
++      },
+       [ALC255_FIXUP_DELL2_MIC_NO_PRESENCE] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+@@ -11050,6 +11064,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
+       {.id = ALC269_FIXUP_DELL2_MIC_NO_PRESENCE, .name = "dell-headset-dock"},
+       {.id = ALC269_FIXUP_DELL3_MIC_NO_PRESENCE, .name = "dell-headset3"},
+       {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE, .name = "dell-headset4"},
++      {.id = ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET, .name = "dell-headset4-quiet"},
+       {.id = ALC283_FIXUP_CHROME_BOOK, .name = "alc283-dac-wcaps"},
+       {.id = ALC283_FIXUP_SENSE_COMBO_JACK, .name = "alc283-sense-combo"},
+       {.id = ALC292_FIXUP_TPT440_DOCK, .name = "tpt440-dock"},
+@@ -11604,16 +11619,16 @@ static const struct snd_hda_pin_quirk alc269_fallback_pin_fixup_tbl[] = {
+       SND_HDA_PIN_QUIRK(0x10ec0289, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
+               {0x19, 0x40000000},
+               {0x1b, 0x40000000}),
+-      SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE,
++      SND_HDA_PIN_QUIRK(0x10ec0295, 0x1028, "Dell", ALC269_FIXUP_DELL4_MIC_NO_PRESENCE_QUIET,
+               {0x19, 0x40000000},
+               {0x1b, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+               {0x19, 0x40000000},
+               {0x1a, 0x40000000}),
+-      SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
++      SND_HDA_PIN_QUIRK(0x10ec0236, 0x1028, "Dell", ALC255_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+               {0x19, 0x40000000},
+               {0x1a, 0x40000000}),
+-      SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC274_FIXUP_DELL_AIO_LINEOUT_VERB,
++      SND_HDA_PIN_QUIRK(0x10ec0274, 0x1028, "Dell", ALC269_FIXUP_DELL1_LIMIT_INT_MIC_BOOST,
+               {0x19, 0x40000000},
+               {0x1a, 0x40000000}),
+       SND_HDA_PIN_QUIRK(0x10ec0256, 0x1043, "ASUS", ALC2XX_FIXUP_HEADSET_MIC,
+-- 
+2.43.0
+
diff --git a/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch b/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch

new file mode 100644 (file)

index 0000000..13a6f75
--- /dev/null
+++ b/queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch
@@ -0,0 +1,59 @@
+From 5335fe64877ec5c011062af1001ab730ee756a17 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 15:15:19 -0600
+Subject: block: fix sanity checks in blk_rq_map_user_bvec
+
+From: Xinyu Zhang <xizhang@purestorage.com>
+
+[ Upstream commit 2ff949441802a8d076d9013c7761f63e8ae5a9bd ]
+
+blk_rq_map_user_bvec contains a check bytes + bv->bv_len > nr_iter which
+causes unnecessary failures in NVMe passthrough I/O, reproducible as
+follows:
+
+- register a 2 page, page-aligned buffer against a ring
+- use that buffer to do a 1 page io_uring NVMe passthrough read
+
+The second (i = 1) iteration of the loop in blk_rq_map_user_bvec will
+then have nr_iter == 1 page, bytes == 1 page, bv->bv_len == 1 page, so
+the check bytes + bv->bv_len > nr_iter will succeed, causing the I/O to
+fail. This failure is unnecessary, as when the check succeeds, it means
+we've checked the entire buffer that will be used by the request - i.e.
+blk_rq_map_user_bvec should complete successfully. Therefore, terminate
+the loop early and return successfully when the check bytes + bv->bv_len
+> nr_iter succeeds.
+
+While we're at it, also remove the check that all segments in the bvec
+are single-page. While this seems to be true for all users of the
+function, it doesn't appear to be required anywhere downstream.
+
+CC: stable@vger.kernel.org
+Signed-off-by: Xinyu Zhang <xizhang@purestorage.com>
+Co-developed-by: Uday Shankar <ushankar@purestorage.com>
+Signed-off-by: Uday Shankar <ushankar@purestorage.com>
+Fixes: 37987547932c ("block: extend functionality to map bvec iterator")
+Link: https://lore.kernel.org/r/20241023211519.4177873-1-ushankar@purestorage.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-map.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/block/blk-map.c b/block/blk-map.c
+index 0e1167b239342..6ef2ec1f7d78b 100644
+--- a/block/blk-map.c
++++ b/block/blk-map.c
+@@ -600,9 +600,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter)
+               if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len)
+                       goto put_bio;
+               if (bytes + bv->bv_len > nr_iter)
+-                      goto put_bio;
+-              if (bv->bv_offset + bv->bv_len > PAGE_SIZE)
+-                      goto put_bio;
++                      break;
+ 
+               nsegs++;
+               bytes += bv->bv_len;
+-- 
+2.43.0
+
diff --git a/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch b/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch

new file mode 100644 (file)

index 0000000..46e9758
--- /dev/null
+++ b/queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch
@@ -0,0 +1,119 @@
+From 60e96501621d271121d40e3002f5fe2317f42273 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 29 Oct 2024 15:18:45 +0000
+Subject: btrfs: fix defrag not merging contiguous extents due to merged extent
+ maps
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 77b0d113eec49a7390ff1a08ca1923e89f5f86c6 ]
+
+When running defrag (manual defrag) against a file that has extents that
+are contiguous and we already have the respective extent maps loaded and
+merged, we end up not defragging the range covered by those contiguous
+extents. This happens when we have an extent map that was the result of
+merging multiple extent maps for contiguous extents and the length of the
+merged extent map is greater than or equals to the defrag threshold
+length.
+
+The script below reproduces this scenario:
+
+   $ cat test.sh
+   #!/bin/bash
+
+   DEV=/dev/sdi
+   MNT=/mnt/sdi
+
+   mkfs.btrfs -f $DEV
+   mount $DEV $MNT
+
+   # Create a 256K file with 4 extents of 64K each.
+   xfs_io -f -c "falloc 0 64K" \
+             -c "pwrite 0 64K" \
+             -c "falloc 64K 64K" \
+             -c "pwrite 64K 64K" \
+             -c "falloc 128K 64K" \
+             -c "pwrite 128K 64K" \
+             -c "falloc 192K 64K" \
+             -c "pwrite 192K 64K" \
+             $MNT/foo
+
+   umount $MNT
+   echo -n "Initial number of file extent items: "
+   btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l
+
+   mount $DEV $MNT
+   # Read the whole file in order to load and merge extent maps.
+   cat $MNT/foo > /dev/null
+
+   btrfs filesystem defragment -t 128K $MNT/foo
+   umount $MNT
+   echo -n "Number of file extent items after defrag with 128K threshold: "
+   btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l
+
+   mount $DEV $MNT
+   # Read the whole file in order to load and merge extent maps.
+   cat $MNT/foo > /dev/null
+
+   btrfs filesystem defragment -t 256K $MNT/foo
+   umount $MNT
+   echo -n "Number of file extent items after defrag with 256K threshold: "
+   btrfs inspect-internal dump-tree -t 5 $DEV | grep EXTENT_DATA | wc -l
+
+Running it:
+
+   $ ./test.sh
+   Initial number of file extent items: 4
+   Number of file extent items after defrag with 128K threshold: 4
+   Number of file extent items after defrag with 256K threshold: 4
+
+The 4 extents don't get merged because we have an extent map with a size
+of 256K that is the result of merging the individual extent maps for each
+of the four 64K extents and at defrag_lookup_extent() we have a value of
+zero for the generation threshold ('newer_than' argument) since this is a
+manual defrag. As a consequence we don't call defrag_get_extent() to get
+an extent map representing a single file extent item in the inode's
+subvolume tree, so we end up using the merged extent map at
+defrag_collect_targets() and decide not to defrag.
+
+Fix this by updating defrag_lookup_extent() to always discard extent maps
+that were merged and call defrag_get_extent() regardless of the minimum
+generation threshold ('newer_than' argument).
+
+A test case for fstests will be sent along soon.
+
+CC: stable@vger.kernel.org # 6.1+
+Fixes: 199257a78bb0 ("btrfs: defrag: don't use merged extent map for their generation check")
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/defrag.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/fs/btrfs/defrag.c b/fs/btrfs/defrag.c
+index f6dbda37a3615..990ef97accec4 100644
+--- a/fs/btrfs/defrag.c
++++ b/fs/btrfs/defrag.c
+@@ -772,12 +772,12 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start,
+        * We can get a merged extent, in that case, we need to re-search
+        * tree to get the original em for defrag.
+        *
+-       * If @newer_than is 0 or em::generation < newer_than, we can trust
+-       * this em, as either we don't care about the generation, or the
+-       * merged extent map will be rejected anyway.
++       * This is because even if we have adjacent extents that are contiguous
++       * and compatible (same type and flags), we still want to defrag them
++       * so that we use less metadata (extent items in the extent tree and
++       * file extent items in the inode's subvolume tree).
+        */
+-      if (em && (em->flags & EXTENT_FLAG_MERGED) &&
+-          newer_than && em->generation >= newer_than) {
++      if (em && (em->flags & EXTENT_FLAG_MERGED)) {
+               free_extent_map(em);
+               em = NULL;
+       }
+-- 
+2.43.0
+
diff --git a/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch b/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch

new file mode 100644 (file)

index 0000000..dd1990b
--- /dev/null
+++ b/queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch
@@ -0,0 +1,243 @@
+From ba1a42ef6f2a8f4e82e915247d69fb42ed3ec46c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 9 Oct 2024 22:52:06 +0900
+Subject: btrfs: fix error propagation of split bios
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit d48e1dea3931de64c26717adc2b89743c7ab6594 ]
+
+The purpose of btrfs_bbio_propagate_error() shall be propagating an error
+of split bio to its original btrfs_bio, and tell the error to the upper
+layer. However, it's not working well on some cases.
+
+* Case 1. Immediate (or quick) end_bio with an error
+
+When btrfs sends btrfs_bio to mirrored devices, btrfs calls
+btrfs_bio_end_io() when all the mirroring bios are completed. If that
+btrfs_bio was split, it is from btrfs_clone_bioset and its end_io function
+is btrfs_orig_write_end_io. For this case, btrfs_bbio_propagate_error()
+accesses the orig_bbio's bio context to increase the error count.
+
+That works well in most cases. However, if the end_io is called enough
+fast, orig_bbio's (remaining part after split) bio context may not be
+properly set at that time. Since the bio context is set when the orig_bbio
+(the last btrfs_bio) is sent to devices, that might be too late for earlier
+split btrfs_bio's completion.  That will result in NULL pointer
+dereference.
+
+That bug is easily reproducible by running btrfs/146 on zoned devices [1]
+and it shows the following trace.
+
+[1] You need raid-stripe-tree feature as it create "-d raid0 -m raid1" FS.
+
+  BUG: kernel NULL pointer dereference, address: 0000000000000020
+  #PF: supervisor read access in kernel mode
+  #PF: error_code(0x0000) - not-present page
+  PGD 0 P4D 0
+  Oops: Oops: 0000 [#1] PREEMPT SMP PTI
+  CPU: 1 UID: 0 PID: 13 Comm: kworker/u32:1 Not tainted 6.11.0-rc7-BTRFS-ZNS+ #474
+  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+  Workqueue: writeback wb_workfn (flush-btrfs-5)
+  RIP: 0010:btrfs_bio_end_io+0xae/0xc0 [btrfs]
+  BTRFS error (device dm-0): bdev /dev/mapper/error-test errs: wr 2, rd 0, flush 0, corrupt 0, gen 0
+  RSP: 0018:ffffc9000006f248 EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: ffff888005a7f080 RCX: ffffc9000006f1dc
+  RDX: 0000000000000000 RSI: 000000000000000a RDI: ffff888005a7f080
+  RBP: ffff888011dfc540 R08: 0000000000000000 R09: 0000000000000001
+  R10: ffffffff82e508e0 R11: 0000000000000005 R12: ffff88800ddfbe58
+  R13: ffff888005a7f080 R14: ffff888005a7f158 R15: ffff888005a7f158
+  FS:  0000000000000000(0000) GS:ffff88803ea80000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 0000000000000020 CR3: 0000000002e22006 CR4: 0000000000370ef0
+  DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+  DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+  Call Trace:
+   <TASK>
+   ? __die_body.cold+0x19/0x26
+   ? page_fault_oops+0x13e/0x2b0
+   ? _printk+0x58/0x73
+   ? do_user_addr_fault+0x5f/0x750
+   ? exc_page_fault+0x76/0x240
+   ? asm_exc_page_fault+0x22/0x30
+   ? btrfs_bio_end_io+0xae/0xc0 [btrfs]
+   ? btrfs_log_dev_io_error+0x7f/0x90 [btrfs]
+   btrfs_orig_write_end_io+0x51/0x90 [btrfs]
+   dm_submit_bio+0x5c2/0xa50 [dm_mod]
+   ? find_held_lock+0x2b/0x80
+   ? blk_try_enter_queue+0x90/0x1e0
+   __submit_bio+0xe0/0x130
+   ? ktime_get+0x10a/0x160
+   ? lockdep_hardirqs_on+0x74/0x100
+   submit_bio_noacct_nocheck+0x199/0x410
+   btrfs_submit_bio+0x7d/0x150 [btrfs]
+   btrfs_submit_chunk+0x1a1/0x6d0 [btrfs]
+   ? lockdep_hardirqs_on+0x74/0x100
+   ? __folio_start_writeback+0x10/0x2c0
+   btrfs_submit_bbio+0x1c/0x40 [btrfs]
+   submit_one_bio+0x44/0x60 [btrfs]
+   submit_extent_folio+0x13f/0x330 [btrfs]
+   ? btrfs_set_range_writeback+0xa3/0xd0 [btrfs]
+   extent_writepage_io+0x18b/0x360 [btrfs]
+   extent_write_locked_range+0x17c/0x340 [btrfs]
+   ? __pfx_end_bbio_data_write+0x10/0x10 [btrfs]
+   run_delalloc_cow+0x71/0xd0 [btrfs]
+   btrfs_run_delalloc_range+0x176/0x500 [btrfs]
+   ? find_lock_delalloc_range+0x119/0x260 [btrfs]
+   writepage_delalloc+0x2ab/0x480 [btrfs]
+   extent_write_cache_pages+0x236/0x7d0 [btrfs]
+   btrfs_writepages+0x72/0x130 [btrfs]
+   do_writepages+0xd4/0x240
+   ? find_held_lock+0x2b/0x80
+   ? wbc_attach_and_unlock_inode+0x12c/0x290
+   ? wbc_attach_and_unlock_inode+0x12c/0x290
+   __writeback_single_inode+0x5c/0x4c0
+   ? do_raw_spin_unlock+0x49/0xb0
+   writeback_sb_inodes+0x22c/0x560
+   __writeback_inodes_wb+0x4c/0xe0
+   wb_writeback+0x1d6/0x3f0
+   wb_workfn+0x334/0x520
+   process_one_work+0x1ee/0x570
+   ? lock_is_held_type+0xc6/0x130
+   worker_thread+0x1d1/0x3b0
+   ? __pfx_worker_thread+0x10/0x10
+   kthread+0xee/0x120
+   ? __pfx_kthread+0x10/0x10
+   ret_from_fork+0x30/0x50
+   ? __pfx_kthread+0x10/0x10
+   ret_from_fork_asm+0x1a/0x30
+   </TASK>
+  Modules linked in: dm_mod btrfs blake2b_generic xor raid6_pq rapl
+  CR2: 0000000000000020
+
+* Case 2. Earlier completion of orig_bbio for mirrored btrfs_bios
+
+btrfs_bbio_propagate_error() assumes the end_io function for orig_bbio is
+called last among split bios. In that case, btrfs_orig_write_end_io() sets
+the bio->bi_status to BLK_STS_IOERR by seeing the bioc->error [2].
+Otherwise, the increased orig_bio's bioc->error is not checked by anyone
+and return BLK_STS_OK to the upper layer.
+
+[2] Actually, this is not true. Because we only increases orig_bioc->errors
+by max_errors, the condition "atomic_read(&bioc->error) > bioc->max_errors"
+is still not met if only one split btrfs_bio fails.
+
+* Case 3. Later completion of orig_bbio for un-mirrored btrfs_bios
+
+In contrast to the above case, btrfs_bbio_propagate_error() is not working
+well if un-mirrored orig_bbio is completed last. It sets
+orig_bbio->bio.bi_status to the btrfs_bio's error. But, that is easily
+over-written by orig_bbio's completion status. If the status is BLK_STS_OK,
+the upper layer would not know the failure.
+
+* Solution
+
+Considering the above cases, we can only save the error status in the
+orig_bbio (remaining part after split) itself as it is always
+available. Also, the saved error status should be propagated when all the
+split btrfs_bios are finished (i.e, bbio->pending_ios == 0).
+
+This commit introduces "status" to btrfs_bbio and saves the first error of
+split bios to original btrfs_bio's "status" variable. When all the split
+bios are finished, the saved status is loaded into original btrfs_bio's
+status.
+
+With this commit, btrfs/146 on zoned devices does not hit the NULL pointer
+dereference anymore.
+
+Fixes: 852eee62d31a ("btrfs: allow btrfs_submit_bio to split bios")
+CC: stable@vger.kernel.org # 6.6+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/bio.c | 37 +++++++++++++------------------------
+ fs/btrfs/bio.h |  3 +++
+ 2 files changed, 16 insertions(+), 24 deletions(-)
+
+diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
+index e93d376796a28..31e437d94869d 100644
+--- a/fs/btrfs/bio.c
++++ b/fs/btrfs/bio.c
+@@ -49,6 +49,7 @@ void btrfs_bio_init(struct btrfs_bio *bbio, struct btrfs_fs_info *fs_info,
+       bbio->end_io = end_io;
+       bbio->private = private;
+       atomic_set(&bbio->pending_ios, 1);
++      WRITE_ONCE(bbio->status, BLK_STS_OK);
+ }
+ 
+ /*
+@@ -120,41 +121,29 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
+       }
+ }
+ 
+-static void btrfs_orig_write_end_io(struct bio *bio);
+-
+-static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
+-                                     struct btrfs_bio *orig_bbio)
+-{
+-      /*
+-       * For writes we tolerate nr_mirrors - 1 write failures, so we can't
+-       * just blindly propagate a write failure here.  Instead increment the
+-       * error count in the original I/O context so that it is guaranteed to
+-       * be larger than the error tolerance.
+-       */
+-      if (bbio->bio.bi_end_io == &btrfs_orig_write_end_io) {
+-              struct btrfs_io_stripe *orig_stripe = orig_bbio->bio.bi_private;
+-              struct btrfs_io_context *orig_bioc = orig_stripe->bioc;
+-
+-              atomic_add(orig_bioc->max_errors, &orig_bioc->error);
+-      } else {
+-              orig_bbio->bio.bi_status = bbio->bio.bi_status;
+-      }
+-}
+-
+ void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
+ {
+       bbio->bio.bi_status = status;
+       if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
+               struct btrfs_bio *orig_bbio = bbio->private;
+ 
+-              if (bbio->bio.bi_status)
+-                      btrfs_bbio_propagate_error(bbio, orig_bbio);
+               btrfs_cleanup_bio(bbio);
+               bbio = orig_bbio;
+       }
+ 
+-      if (atomic_dec_and_test(&bbio->pending_ios))
++      /*
++       * At this point, bbio always points to the original btrfs_bio. Save
++       * the first error in it.
++       */
++      if (status != BLK_STS_OK)
++              cmpxchg(&bbio->status, BLK_STS_OK, status);
++
++      if (atomic_dec_and_test(&bbio->pending_ios)) {
++              /* Load split bio's error which might be set above. */
++              if (status == BLK_STS_OK)
++                      bbio->bio.bi_status = READ_ONCE(bbio->status);
+               __btrfs_bio_end_io(bbio);
++      }
+ }
+ 
+ static int next_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+diff --git a/fs/btrfs/bio.h b/fs/btrfs/bio.h
+index d9dd5276093df..043f94562166b 100644
+--- a/fs/btrfs/bio.h
++++ b/fs/btrfs/bio.h
+@@ -79,6 +79,9 @@ struct btrfs_bio {
+       /* File system that this I/O operates on. */
+       struct btrfs_fs_info *fs_info;
+ 
++      /* Save the first error status of split bio. */
++      blk_status_t status;
++
+       /*
+        * This member must come last, bio_alloc_bioset will allocate enough
+        * bytes for entire btrfs_bio but relies on bio being last.
+-- 
+2.43.0
+
diff --git a/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch b/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch

new file mode 100644 (file)

index 0000000..4f25210
--- /dev/null
+++ b/queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch
@@ -0,0 +1,109 @@
+From 5fa935d4185f50c9e50ded8a6982ca433892847a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 16:23:00 +0000
+Subject: btrfs: fix extent map merging not happening for adjacent extents
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit a0f0625390858321525c2a8d04e174a546bd19b3 ]
+
+If we have 3 or more adjacent extents in a file, that is, consecutive file
+extent items pointing to adjacent extents, within a contiguous file range
+and compatible flags, we end up not merging all the extents into a single
+extent map.
+
+For example:
+
+  $ mkfs.btrfs -f /dev/sdc
+  $ mount /dev/sdc /mnt/sdc
+
+  $ xfs_io -f -d -c "pwrite -b 64K 0 64K" \
+                 -c "pwrite -b 64K 64K 64K" \
+                 -c "pwrite -b 64K 128K 64K" \
+                 -c "pwrite -b 64K 192K 64K" \
+                 /mnt/sdc/foo
+
+After all the ordered extents complete we unpin the extent maps and try
+to merge them, but instead of getting a single extent map we get two
+because:
+
+1) When the first ordered extent completes (file range [0, 64K)) we
+   unpin its extent map and attempt to merge it with the extent map for
+   the range [64K, 128K), but we can't because that extent map is still
+   pinned;
+
+2) When the second ordered extent completes (file range [64K, 128K)), we
+   unpin its extent map and merge it with the previous extent map, for
+   file range [0, 64K), but we can't merge with the next extent map, for
+   the file range [128K, 192K), because this one is still pinned.
+
+   The merged extent map for the file range [0, 128K) gets the flag
+   EXTENT_MAP_MERGED set;
+
+3) When the third ordered extent completes (file range [128K, 192K)), we
+   unpin its extent map and attempt to merge it with the previous extent
+   map, for file range [0, 128K), but we can't because that extent map
+   has the flag EXTENT_MAP_MERGED set (mergeable_maps() returns false
+   due to different flags) while the extent map for the range [128K, 192K)
+   doesn't have that flag set.
+
+   We also can't merge it with the next extent map, for file range
+   [192K, 256K), because that one is still pinned.
+
+   At this moment we have 3 extent maps:
+
+   One for file range [0, 128K), with the flag EXTENT_MAP_MERGED set.
+   One for file range [128K, 192K).
+   One for file range [192K, 256K) which is still pinned;
+
+4) When the fourth and final extent completes (file range [192K, 256K)),
+   we unpin its extent map and attempt to merge it with the previous
+   extent map, for file range [128K, 192K), which succeeds since none
+   of these extent maps have the EXTENT_MAP_MERGED flag set.
+
+   So we end up with 2 extent maps:
+
+   One for file range [0, 128K), with the flag EXTENT_MAP_MERGED set.
+   One for file range [128K, 256K), with the flag EXTENT_MAP_MERGED set.
+
+   Since after merging extent maps we don't attempt to merge again, that
+   is, merge the resulting extent map with the one that is now preceding
+   it (and the one following it), we end up with those two extent maps,
+   when we could have had a single extent map to represent the whole file.
+
+Fix this by making mergeable_maps() ignore the EXTENT_MAP_MERGED flag.
+While this doesn't present any functional issue, it prevents the merging
+of extent maps which allows to save memory, and can make defrag not
+merging extents too (that will be addressed in the next patch).
+
+Fixes: 199257a78bb0 ("btrfs: defrag: don't use merged extent map for their generation check")
+CC: stable@vger.kernel.org # 6.1+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_map.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
+index 72ae8f64482c6..b56ec83bf9528 100644
+--- a/fs/btrfs/extent_map.c
++++ b/fs/btrfs/extent_map.c
+@@ -227,7 +227,12 @@ static bool mergeable_maps(const struct extent_map *prev, const struct extent_ma
+       if (extent_map_end(prev) != next->start)
+               return false;
+ 
+-      if (prev->flags != next->flags)
++      /*
++       * The merged flag is not an on-disk flag, it just indicates we had the
++       * extent maps of 2 (or more) adjacent extents merged, so factor it out.
++       */
++      if ((prev->flags & ~EXTENT_FLAG_MERGED) !=
++          (next->flags & ~EXTENT_FLAG_MERGED))
+               return false;
+ 
+       if (next->disk_bytenr < EXTENT_MAP_LAST_BYTE - 1)
+-- 
+2.43.0
+
diff --git a/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch b/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch

new file mode 100644 (file)

index 0000000..cd2a20a
--- /dev/null
+++ b/queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch
@@ -0,0 +1,78 @@
+From 51a566b2d28380de4fe53a243104685e9e5fbc55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2024 22:02:15 +0800
+Subject: btrfs: fix use-after-free of block device file in
+ __btrfs_free_extra_devids()
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Zhihao Cheng <chengzhihao1@huawei.com>
+
+[ Upstream commit aec8e6bf839101784f3ef037dcdb9432c3f32343 ]
+
+Mounting btrfs from two images (which have the same one fsid and two
+different dev_uuids) in certain executing order may trigger an UAF for
+variable 'device->bdev_file' in __btrfs_free_extra_devids(). And
+following are the details:
+
+1. Attach image_1 to loop0, attach image_2 to loop1, and scan btrfs
+   devices by ioctl(BTRFS_IOC_SCAN_DEV):
+
+             /  btrfs_device_1 → loop0
+   fs_device
+             \  btrfs_device_2 → loop1
+2. mount /dev/loop0 /mnt
+   btrfs_open_devices
+    btrfs_device_1->bdev_file = btrfs_get_bdev_and_sb(loop0)
+    btrfs_device_2->bdev_file = btrfs_get_bdev_and_sb(loop1)
+   btrfs_fill_super
+    open_ctree
+     fail: btrfs_close_devices // -ENOMEM
+           btrfs_close_bdev(btrfs_device_1)
+             fput(btrfs_device_1->bdev_file)
+             // btrfs_device_1->bdev_file is freed
+           btrfs_close_bdev(btrfs_device_2)
+             fput(btrfs_device_2->bdev_file)
+
+3. mount /dev/loop1 /mnt
+   btrfs_open_devices
+    btrfs_get_bdev_and_sb(&bdev_file)
+     // EIO, btrfs_device_1->bdev_file is not assigned,
+     // which points to a freed memory area
+    btrfs_device_2->bdev_file = btrfs_get_bdev_and_sb(loop1)
+   btrfs_fill_super
+    open_ctree
+     btrfs_free_extra_devids
+      if (btrfs_device_1->bdev_file)
+       fput(btrfs_device_1->bdev_file) // UAF !
+
+Fix it by setting 'device->bdev_file' as 'NULL' after closing the
+btrfs_device in btrfs_close_one_device().
+
+Fixes: 142388194191 ("btrfs: do not background blkdev_put()")
+CC: stable@vger.kernel.org # 4.19+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219408
+Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index fcedc43ef291a..0485143cd75e0 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1103,6 +1103,7 @@ static void btrfs_close_one_device(struct btrfs_device *device)
+       if (device->bdev) {
+               fs_devices->open_devices--;
+               device->bdev = NULL;
++              device->bdev_file = NULL;
+       }
+       clear_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state);
+       btrfs_destroy_dev_zone_info(device);
+-- 
+2.43.0
+
diff --git a/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch b/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch

new file mode 100644 (file)

index 0000000..5e4ebf9
--- /dev/null
+++ b/queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch
@@ -0,0 +1,152 @@
+From 7b6c09ddc9114f07ef62e41a6c9e79046df8adf1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 24 Aug 2024 19:36:43 +0930
+Subject: btrfs: merge btrfs_orig_bbio_end_io() into btrfs_bio_end_io()
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 9ca0e58cb752b09816f56f7a3147a39773d5e831 ]
+
+There are only two differences between the two functions:
+
+- btrfs_orig_bbio_end_io() does extra error propagation
+  This is mostly to allow tolerance for write errors.
+
+- btrfs_orig_bbio_end_io() does extra pending_ios check
+  This check can handle both the original bio, or the cloned one.
+  (All accounting happens in the original one).
+
+This makes btrfs_orig_bbio_end_io() a much safer call.
+In fact we already had a double freeing error due to usage of
+btrfs_bio_end_io() in the error path of btrfs_submit_chunk().
+
+So just move the whole content of btrfs_orig_bbio_end_io() into
+btrfs_bio_end_io().
+
+For normal paths this brings no change, because they are already calling
+btrfs_orig_bbio_end_io() in the first place.
+
+For error paths (not only inside bio.c but also external callers), this
+change will introduce extra checks, especially for external callers, as
+they will error out without submitting the btrfs bio.
+
+But considering it's already in the error path, such slower but much
+safer checks are still an overall win.
+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: d48e1dea3931 ("btrfs: fix error propagation of split bios")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/bio.c | 29 +++++++++++------------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c
+index b4e31ae17cd95..e93d376796a28 100644
+--- a/fs/btrfs/bio.c
++++ b/fs/btrfs/bio.c
+@@ -120,12 +120,6 @@ static void __btrfs_bio_end_io(struct btrfs_bio *bbio)
+       }
+ }
+ 
+-void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
+-{
+-      bbio->bio.bi_status = status;
+-      __btrfs_bio_end_io(bbio);
+-}
+-
+ static void btrfs_orig_write_end_io(struct bio *bio);
+ 
+ static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
+@@ -147,8 +141,9 @@ static void btrfs_bbio_propagate_error(struct btrfs_bio *bbio,
+       }
+ }
+ 
+-static void btrfs_orig_bbio_end_io(struct btrfs_bio *bbio)
++void btrfs_bio_end_io(struct btrfs_bio *bbio, blk_status_t status)
+ {
++      bbio->bio.bi_status = status;
+       if (bbio->bio.bi_pool == &btrfs_clone_bioset) {
+               struct btrfs_bio *orig_bbio = bbio->private;
+ 
+@@ -179,7 +174,7 @@ static int prev_repair_mirror(struct btrfs_failed_bio *fbio, int cur_mirror)
+ static void btrfs_repair_done(struct btrfs_failed_bio *fbio)
+ {
+       if (atomic_dec_and_test(&fbio->repair_count)) {
+-              btrfs_orig_bbio_end_io(fbio->bbio);
++              btrfs_bio_end_io(fbio->bbio, fbio->bbio->bio.bi_status);
+               mempool_free(fbio, &btrfs_failed_bio_pool);
+       }
+ }
+@@ -326,7 +321,7 @@ static void btrfs_check_read_bio(struct btrfs_bio *bbio, struct btrfs_device *de
+       if (fbio)
+               btrfs_repair_done(fbio);
+       else
+-              btrfs_orig_bbio_end_io(bbio);
++              btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ }
+ 
+ static void btrfs_log_dev_io_error(struct bio *bio, struct btrfs_device *dev)
+@@ -360,7 +355,7 @@ static void btrfs_end_bio_work(struct work_struct *work)
+       if (is_data_bbio(bbio))
+               btrfs_check_read_bio(bbio, bbio->bio.bi_private);
+       else
+-              btrfs_orig_bbio_end_io(bbio);
++              btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ }
+ 
+ static void btrfs_simple_end_io(struct bio *bio)
+@@ -380,7 +375,7 @@ static void btrfs_simple_end_io(struct bio *bio)
+       } else {
+               if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+                       btrfs_record_physical_zoned(bbio);
+-              btrfs_orig_bbio_end_io(bbio);
++              btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+       }
+ }
+ 
+@@ -394,7 +389,7 @@ static void btrfs_raid56_end_io(struct bio *bio)
+       if (bio_op(bio) == REQ_OP_READ && is_data_bbio(bbio))
+               btrfs_check_read_bio(bbio, NULL);
+       else
+-              btrfs_orig_bbio_end_io(bbio);
++              btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+ 
+       btrfs_put_bioc(bioc);
+ }
+@@ -424,7 +419,7 @@ static void btrfs_orig_write_end_io(struct bio *bio)
+       if (bio_op(bio) == REQ_OP_ZONE_APPEND && !bio->bi_status)
+               stripe->physical = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+ 
+-      btrfs_orig_bbio_end_io(bbio);
++      btrfs_bio_end_io(bbio, bbio->bio.bi_status);
+       btrfs_put_bioc(bioc);
+ }
+ 
+@@ -593,7 +588,7 @@ static void run_one_async_done(struct btrfs_work *work, bool do_free)
+ 
+       /* If an error occurred we just want to clean up the bio and move on. */
+       if (bio->bi_status) {
+-              btrfs_orig_bbio_end_io(async->bbio);
++              btrfs_bio_end_io(async->bbio, async->bbio->bio.bi_status);
+               return;
+       }
+ 
+@@ -765,11 +760,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num)
+               ASSERT(bbio->bio.bi_pool == &btrfs_clone_bioset);
+               ASSERT(remaining);
+ 
+-              remaining->bio.bi_status = ret;
+-              btrfs_orig_bbio_end_io(remaining);
++              btrfs_bio_end_io(remaining, ret);
+       }
+-      bbio->bio.bi_status = ret;
+-      btrfs_orig_bbio_end_io(bbio);
++      btrfs_bio_end_io(bbio, ret);
+       /* Do not submit another chunk */
+       return true;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch b/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch

new file mode 100644 (file)

index 0000000..295432e
--- /dev/null
+++ b/queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch
@@ -0,0 +1,154 @@
+From aae339a4d594e3581eb68221a4bff0aa58baf7de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 11:24:56 +0000
+Subject: cgroup/bpf: use a dedicated workqueue for cgroup bpf destruction
+
+From: Chen Ridong <chenridong@huawei.com>
+
+[ Upstream commit 117932eea99b729ee5d12783601a4f7f5fd58a23 ]
+
+A hung_task problem shown below was found:
+
+INFO: task kworker/0:0:8 blocked for more than 327 seconds.
+"echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+Workqueue: events cgroup_bpf_release
+Call Trace:
+ <TASK>
+ __schedule+0x5a2/0x2050
+ ? find_held_lock+0x33/0x100
+ ? wq_worker_sleeping+0x9e/0xe0
+ schedule+0x9f/0x180
+ schedule_preempt_disabled+0x25/0x50
+ __mutex_lock+0x512/0x740
+ ? cgroup_bpf_release+0x1e/0x4d0
+ ? cgroup_bpf_release+0xcf/0x4d0
+ ? process_scheduled_works+0x161/0x8a0
+ ? cgroup_bpf_release+0x1e/0x4d0
+ ? mutex_lock_nested+0x2b/0x40
+ ? __pfx_delay_tsc+0x10/0x10
+ mutex_lock_nested+0x2b/0x40
+ cgroup_bpf_release+0xcf/0x4d0
+ ? process_scheduled_works+0x161/0x8a0
+ ? trace_event_raw_event_workqueue_execute_start+0x64/0xd0
+ ? process_scheduled_works+0x161/0x8a0
+ process_scheduled_works+0x23a/0x8a0
+ worker_thread+0x231/0x5b0
+ ? __pfx_worker_thread+0x10/0x10
+ kthread+0x14d/0x1c0
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork+0x59/0x70
+ ? __pfx_kthread+0x10/0x10
+ ret_from_fork_asm+0x1b/0x30
+ </TASK>
+
+This issue can be reproduced by the following pressuse test:
+1. A large number of cpuset cgroups are deleted.
+2. Set cpu on and off repeatly.
+3. Set watchdog_thresh repeatly.
+The scripts can be obtained at LINK mentioned above the signature.
+
+The reason for this issue is cgroup_mutex and cpu_hotplug_lock are
+acquired in different tasks, which may lead to deadlock.
+It can lead to a deadlock through the following steps:
+1. A large number of cpusets are deleted asynchronously, which puts a
+   large number of cgroup_bpf_release works into system_wq. The max_active
+   of system_wq is WQ_DFL_ACTIVE(256). Consequently, all active works are
+   cgroup_bpf_release works, and many cgroup_bpf_release works will be put
+   into inactive queue. As illustrated in the diagram, there are 256 (in
+   the acvtive queue) + n (in the inactive queue) works.
+2. Setting watchdog_thresh will hold cpu_hotplug_lock.read and put
+   smp_call_on_cpu work into system_wq. However step 1 has already filled
+   system_wq, 'sscs.work' is put into inactive queue. 'sscs.work' has
+   to wait until the works that were put into the inacvtive queue earlier
+   have executed (n cgroup_bpf_release), so it will be blocked for a while.
+3. Cpu offline requires cpu_hotplug_lock.write, which is blocked by step 2.
+4. Cpusets that were deleted at step 1 put cgroup_release works into
+   cgroup_destroy_wq. They are competing to get cgroup_mutex all the time.
+   When cgroup_metux is acqured by work at css_killed_work_fn, it will
+   call cpuset_css_offline, which needs to acqure cpu_hotplug_lock.read.
+   However, cpuset_css_offline will be blocked for step 3.
+5. At this moment, there are 256 works in active queue that are
+   cgroup_bpf_release, they are attempting to acquire cgroup_mutex, and as
+   a result, all of them are blocked. Consequently, sscs.work can not be
+   executed. Ultimately, this situation leads to four processes being
+   blocked, forming a deadlock.
+
+system_wq(step1)               WatchDog(step2)                 cpu offline(step3)      cgroup_destroy_wq(step4)
+...
+2000+ cgroups deleted asyn
+256 actives + n inactives
+                               __lockup_detector_reconfigure
+                               P(cpu_hotplug_lock.read)
+                               put sscs.work into system_wq
+256 + n + 1(sscs.work)
+sscs.work wait to be executed
+                               warting sscs.work finish
+                                                               percpu_down_write
+                                                               P(cpu_hotplug_lock.write)
+                                                               ...blocking...
+                                                                                       css_killed_work_fn
+                                                                                       P(cgroup_mutex)
+                                                                                       cpuset_css_offline
+                                                                                       P(cpu_hotplug_lock.read)
+                                                                                       ...blocking...
+256 cgroup_bpf_release
+mutex_lock(&cgroup_mutex);
+..blocking...
+
+To fix the problem, place cgroup_bpf_release works on a dedicated
+workqueue which can break the loop and solve the problem. System wqs are
+for misc things which shouldn't create a large number of concurrent work
+items. If something is going to generate >WQ_DFL_ACTIVE(256) concurrent
+work items, it should use its own dedicated workqueue.
+
+Fixes: 4bfc0bb2c60e ("bpf: decouple the lifetime of cgroup_bpf from cgroup itself")
+Cc: stable@vger.kernel.org # v5.3+
+Link: https://lore.kernel.org/cgroups/e90c32d2-2a85-4f28-9154-09c7d320cb60@huawei.com/T/#t
+Tested-by: Vishal Chourasia <vishalc@linux.ibm.com>
+Signed-off-by: Chen Ridong <chenridong@huawei.com>
+Signed-off-by: Tejun Heo <tj@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cgroup.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
+index 8ba73042a2395..479a2ea5d9af6 100644
+--- a/kernel/bpf/cgroup.c
++++ b/kernel/bpf/cgroup.c
+@@ -24,6 +24,23 @@
+ DEFINE_STATIC_KEY_ARRAY_FALSE(cgroup_bpf_enabled_key, MAX_CGROUP_BPF_ATTACH_TYPE);
+ EXPORT_SYMBOL(cgroup_bpf_enabled_key);
+ 
++/*
++ * cgroup bpf destruction makes heavy use of work items and there can be a lot
++ * of concurrent destructions.  Use a separate workqueue so that cgroup bpf
++ * destruction work items don't end up filling up max_active of system_wq
++ * which may lead to deadlock.
++ */
++static struct workqueue_struct *cgroup_bpf_destroy_wq;
++
++static int __init cgroup_bpf_wq_init(void)
++{
++      cgroup_bpf_destroy_wq = alloc_workqueue("cgroup_bpf_destroy", 0, 1);
++      if (!cgroup_bpf_destroy_wq)
++              panic("Failed to alloc workqueue for cgroup bpf destroy.\n");
++      return 0;
++}
++core_initcall(cgroup_bpf_wq_init);
++
+ /* __always_inline is necessary to prevent indirect call through run_prog
+  * function pointer.
+  */
+@@ -334,7 +351,7 @@ static void cgroup_bpf_release_fn(struct percpu_ref *ref)
+       struct cgroup *cgrp = container_of(ref, struct cgroup, bpf.refcnt);
+ 
+       INIT_WORK(&cgrp->bpf.release_work, cgroup_bpf_release);
+-      queue_work(system_wq, &cgrp->bpf.release_work);
++      queue_work(cgroup_bpf_destroy_wq, &cgrp->bpf.release_work);
+ }
+ 
+ /* Get underlying bpf_prog of bpf_prog_list entry, regardless if it's through
+-- 
+2.43.0
+
diff --git a/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch b/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch

new file mode 100644 (file)

index 0000000..956db3a
--- /dev/null
+++ b/queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch
@@ -0,0 +1,52 @@
+From 766c3af9a1083669e7f2e58be4cbd65e0c861039 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:43:40 -0700
+Subject: cxl/acpi: Ensure ports ready at cxl_acpi_probe() return
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+[ Upstream commit 48f62d38a07d464a499fa834638afcfd2b68f852 ]
+
+In order to ensure root CXL ports are enabled upon cxl_acpi_probe()
+when the 'cxl_port' driver is built as a module, arrange for the
+module to be pre-loaded or built-in.
+
+The "Fixes:" but no "Cc: stable" on this patch reflects that the issue
+is merely by inspection since the bug that triggered the discovery of
+this potential problem [1] is fixed by other means. However, a stable
+backport should do no harm.
+
+Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver")
+Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Tested-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Link: https://patch.msgid.link/172964781969.81806.17276352414854540808.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/acpi.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/cxl/acpi.c b/drivers/cxl/acpi.c
+index 82b78e331d8ed..432b7cfd12a8e 100644
+--- a/drivers/cxl/acpi.c
++++ b/drivers/cxl/acpi.c
+@@ -924,6 +924,13 @@ static void __exit cxl_acpi_exit(void)
+ 
+ /* load before dax_hmem sees 'Soft Reserved' CXL ranges */
+ subsys_initcall(cxl_acpi_init);
++
++/*
++ * Arrange for host-bridge ports to be active synchronous with
++ * cxl_acpi_probe() exit.
++ */
++MODULE_SOFTDEP("pre: cxl_port");
++
+ module_exit(cxl_acpi_exit);
+ MODULE_DESCRIPTION("CXL ACPI: Platform Support");
+ MODULE_LICENSE("GPL v2");
+-- 
+2.43.0
+
diff --git a/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch b/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch

new file mode 100644 (file)

index 0000000..700db31
--- /dev/null
+++ b/queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch
@@ -0,0 +1,65 @@
+From 6d020f71c952bb55609ca8d6c8c4d20280696f47 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:43:32 -0700
+Subject: cxl/port: Fix cxl_bus_rescan() vs bus_rescan_devices()
+
+From: Dan Williams <dan.j.williams@intel.com>
+
+[ Upstream commit 3d6ebf16438de5d712030fefbb4182b46373d677 ]
+
+It turns out since its original introduction, pre-2.6.12,
+bus_rescan_devices() has skipped devices that might be in the process of
+attaching or detaching from their driver. For CXL this behavior is
+unwanted and expects that cxl_bus_rescan() is a probe barrier.
+
+That behavior is simple enough to achieve with bus_for_each_dev() paired
+with call to device_attach(), and it is unclear why bus_rescan_devices()
+took the position of lockless consumption of dev->driver which is racy.
+
+The "Fixes:" but no "Cc: stable" on this patch reflects that the issue
+is merely by inspection since the bug that triggered the discovery of
+this potential problem [1] is fixed by other means.  However, a stable
+backport should do no harm.
+
+Fixes: 8dd2bc0f8e02 ("cxl/mem: Add the cxl_mem driver")
+Link: http://lore.kernel.org/20241004212504.1246-1-gourry@gourry.net [1]
+Signed-off-by: Dan Williams <dan.j.williams@intel.com>
+Tested-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Reviewed-by: Ira Weiny <ira.weiny@intel.com>
+Link: https://patch.msgid.link/172964781104.81806.4277549800082443769.stgit@dwillia2-xfh.jf.intel.com
+Signed-off-by: Ira Weiny <ira.weiny@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/cxl/core/port.c | 13 ++++++++++---
+ 1 file changed, 10 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/cxl/core/port.c b/drivers/cxl/core/port.c
+index 1d5007e3795a3..d3237346f6877 100644
+--- a/drivers/cxl/core/port.c
++++ b/drivers/cxl/core/port.c
+@@ -2088,11 +2088,18 @@ static void cxl_bus_remove(struct device *dev)
+ 
+ static struct workqueue_struct *cxl_bus_wq;
+ 
+-static void cxl_bus_rescan_queue(struct work_struct *w)
++static int cxl_rescan_attach(struct device *dev, void *data)
+ {
+-      int rc = bus_rescan_devices(&cxl_bus_type);
++      int rc = device_attach(dev);
++
++      dev_vdbg(dev, "rescan: %s\n", rc ? "attach" : "detached");
+ 
+-      pr_debug("CXL bus rescan result: %d\n", rc);
++      return 0;
++}
++
++static void cxl_bus_rescan_queue(struct work_struct *w)
++{
++      bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_rescan_attach);
+ }
+ 
+ void cxl_bus_rescan(void)
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch b/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch

new file mode 100644 (file)

index 0000000..d168cec
--- /dev/null
+++ b/queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch
@@ -0,0 +1,100 @@
+From 9b1bad33fb30daa0772b6492902fe79557767f10 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 15:56:39 +0100
+Subject: drm/amd/pm: Vangogh: Fix kernel memory out of bounds write
+
+From: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+
+[ Upstream commit 4aa923a6e6406b43566ef6ac35a3d9a3197fa3e8 ]
+
+KASAN reports that the GPU metrics table allocated in
+vangogh_tables_init() is not large enough for the memset done in
+smu_cmn_init_soft_gpu_metrics(). Condensed report follows:
+
+[   33.861314] BUG: KASAN: slab-out-of-bounds in smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu]
+[   33.861799] Write of size 168 at addr ffff888129f59500 by task mangoapp/1067
+...
+[   33.861808] CPU: 6 UID: 1000 PID: 1067 Comm: mangoapp Tainted: G        W          6.12.0-rc4 #356 1a56f59a8b5182eeaf67eb7cb8b13594dd23b544
+[   33.861816] Tainted: [W]=WARN
+[   33.861818] Hardware name: Valve Galileo/Galileo, BIOS F7G0107 12/01/2023
+[   33.861822] Call Trace:
+[   33.861826]  <TASK>
+[   33.861829]  dump_stack_lvl+0x66/0x90
+[   33.861838]  print_report+0xce/0x620
+[   33.861853]  kasan_report+0xda/0x110
+[   33.862794]  kasan_check_range+0xfd/0x1a0
+[   33.862799]  __asan_memset+0x23/0x40
+[   33.862803]  smu_cmn_init_soft_gpu_metrics+0x73/0x200 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779]
+[   33.863306]  vangogh_get_gpu_metrics_v2_4+0x123/0xad0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779]
+[   33.864257]  vangogh_common_get_gpu_metrics+0xb0c/0xbc0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779]
+[   33.865682]  amdgpu_dpm_get_gpu_metrics+0xcc/0x110 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779]
+[   33.866160]  amdgpu_get_gpu_metrics+0x154/0x2d0 [amdgpu 13b1bc364ec578808f676eba412c20eaab792779]
+[   33.867135]  dev_attr_show+0x43/0xc0
+[   33.867147]  sysfs_kf_seq_show+0x1f1/0x3b0
+[   33.867155]  seq_read_iter+0x3f8/0x1140
+[   33.867173]  vfs_read+0x76c/0xc50
+[   33.867198]  ksys_read+0xfb/0x1d0
+[   33.867214]  do_syscall_64+0x90/0x160
+...
+[   33.867353] Allocated by task 378 on cpu 7 at 22.794876s:
+[   33.867358]  kasan_save_stack+0x33/0x50
+[   33.867364]  kasan_save_track+0x17/0x60
+[   33.867367]  __kasan_kmalloc+0x87/0x90
+[   33.867371]  vangogh_init_smc_tables+0x3f9/0x840 [amdgpu]
+[   33.867835]  smu_sw_init+0xa32/0x1850 [amdgpu]
+[   33.868299]  amdgpu_device_init+0x467b/0x8d90 [amdgpu]
+[   33.868733]  amdgpu_driver_load_kms+0x19/0xf0 [amdgpu]
+[   33.869167]  amdgpu_pci_probe+0x2d6/0xcd0 [amdgpu]
+[   33.869608]  local_pci_probe+0xda/0x180
+[   33.869614]  pci_device_probe+0x43f/0x6b0
+
+Empirically we can confirm that the former allocates 152 bytes for the
+table, while the latter memsets the 168 large block.
+
+Root cause appears that when GPU metrics tables for v2_4 parts were added
+it was not considered to enlarge the table to fit.
+
+The fix in this patch is rather "brute force" and perhaps later should be
+done in a smarter way, by extracting and consolidating the part version to
+size logic to a common helper, instead of brute forcing the largest
+possible allocation. Nevertheless, for now this works and fixes the out of
+bounds write.
+
+v2:
+ * Drop impossible v3_0 case. (Mario)
+
+Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin@igalia.com>
+Fixes: 41cec40bc9ba ("drm/amd/pm: Vangogh: Add new gpu_metrics_v2_4 to acquire gpu_metrics")
+Cc: Mario Limonciello <mario.limonciello@amd.com>
+Cc: Evan Quan <evan.quan@amd.com>
+Cc: Wenyou Yang <WenYou.Yang@amd.com>
+Cc: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Mario Limonciello <mario.limonciello@amd.com>
+Link: https://lore.kernel.org/r/20241025145639.19124-1-tursulin@igalia.com
+Signed-off-by: Mario Limonciello <mario.limonciello@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit 0880f58f9609f0200483a49429af0f050d281703)
+Cc: stable@vger.kernel.org # v6.6+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+index 22737b11b1bfb..1fe020f1f4dbe 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
+@@ -242,7 +242,9 @@ static int vangogh_tables_init(struct smu_context *smu)
+               goto err0_out;
+       smu_table->metrics_time = 0;
+ 
+-      smu_table->gpu_metrics_table_size = max(sizeof(struct gpu_metrics_v2_3), sizeof(struct gpu_metrics_v2_2));
++      smu_table->gpu_metrics_table_size = sizeof(struct gpu_metrics_v2_2);
++      smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_3));
++      smu_table->gpu_metrics_table_size = max(smu_table->gpu_metrics_table_size, sizeof(struct gpu_metrics_v2_4));
+       smu_table->gpu_metrics_table = kzalloc(smu_table->gpu_metrics_table_size, GFP_KERNEL);
+       if (!smu_table->gpu_metrics_table)
+               goto err1_out;
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch b/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch

new file mode 100644 (file)

index 0000000..95b5b26
--- /dev/null
+++ b/queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch
@@ -0,0 +1,62 @@
+From a3cfd48d8a538fd8f5bf83fc3e0dea6ddbb80b59 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 09:13:21 -0400
+Subject: drm/amdgpu/smu13: fix profile reporting
+
+From: Alex Deucher <alexander.deucher@amd.com>
+
+[ Upstream commit 935abb86a95def8c20dbb184ce30051db168e541 ]
+
+The following 3 commits landed in parallel:
+commit d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting")
+commit 7a1613e47e65 ("drm/amdgpu/smu13: always apply the powersave optimization")
+commit 7c210ca5a2d7 ("drm/amdgpu: handle default profile on on devices without fullscreen 3D")
+While everything is set correctly, this caused the profile to be
+reported incorrectly because both the powersave and fullscreen3d bits
+were set in the mask and when the driver prints the profile, it looks
+for the first bit set.
+
+Fixes: d7d2688bf4ea ("drm/amd/pm: update workload mask after the setting")
+Reviewed-by: Kenneth Feng <kenneth.feng@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+(cherry picked from commit ecfe9b237687a55d596fff0650ccc8cc455edd3f)
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+index cb923e33fd6fc..d53e162dcd8de 100644
+--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
++++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_0_ppt.c
+@@ -2485,7 +2485,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+       DpmActivityMonitorCoeffInt_t *activity_monitor =
+               &(activity_monitor_external.DpmActivityMonitorCoeffInt);
+       int workload_type, ret = 0;
+-      u32 workload_mask;
++      u32 workload_mask, selected_workload_mask;
+ 
+       smu->power_profile_mode = input[size];
+ 
+@@ -2552,7 +2552,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+       if (workload_type < 0)
+               return -EINVAL;
+ 
+-      workload_mask = 1 << workload_type;
++      selected_workload_mask = workload_mask = 1 << workload_type;
+ 
+       /* Add optimizations for SMU13.0.0/10.  Reuse the power saving profile */
+       if ((amdgpu_ip_version(smu->adev, MP1_HWIP, 0) == IP_VERSION(13, 0, 0) &&
+@@ -2572,7 +2572,7 @@ static int smu_v13_0_0_set_power_profile_mode(struct smu_context *smu,
+                                              workload_mask,
+                                              NULL);
+       if (!ret)
+-              smu->workload_mask = workload_mask;
++              smu->workload_mask = selected_workload_mask;
+ 
+       return ret;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch b/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch

new file mode 100644 (file)

index 0000000..0bd8d75
--- /dev/null
+++ b/queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch
@@ -0,0 +1,162 @@
+From 4d87c86a04ab2ae04c89669954e50908ab48df70 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Oct 2024 10:35:03 +0800
+Subject: drm/connector: hdmi: Fix memory leak in
+ drm_display_mode_from_cea_vic()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+[ Upstream commit 926163342a2e7595d950e84c17c693b1272bd491 ]
+
+modprobe drm_connector_test and then rmmod drm_connector_test,
+the following memory leak occurs.
+
+The `mode` allocated in drm_mode_duplicate() called by
+drm_display_mode_from_cea_vic() is not freed, which cause the memory leak:
+
+       unreferenced object 0xffffff80cb0ee400 (size 128):
+         comm "kunit_try_catch", pid 1948, jiffies 4294950339
+         hex dump (first 32 bytes):
+           14 44 02 00 80 07 d8 07 04 08 98 08 00 00 38 04  .D............8.
+           3c 04 41 04 65 04 00 00 05 00 00 00 00 00 00 00  <.A.e...........
+         backtrace (crc 90e9585c):
+           [<00000000ec42e3d7>] kmemleak_alloc+0x34/0x40
+           [<00000000d0ef055a>] __kmalloc_cache_noprof+0x26c/0x2f4
+           [<00000000c2062161>] drm_mode_duplicate+0x44/0x19c
+           [<00000000f96c74aa>] drm_display_mode_from_cea_vic+0x88/0x98
+           [<00000000d8f2c8b4>] 0xffffffdc982a4868
+           [<000000005d164dbc>] kunit_try_run_case+0x13c/0x3ac
+           [<000000006fb23398>] kunit_generic_run_threadfn_adapter+0x80/0xec
+           [<000000006ea56ca0>] kthread+0x2e8/0x374
+           [<000000000676063f>] ret_from_fork+0x10/0x20
+       ......
+
+Free `mode` by using drm_kunit_display_mode_from_cea_vic()
+to fix it.
+
+Cc: stable@vger.kernel.org
+Fixes: abb6f74973e2 ("drm/tests: Add HDMI TDMS character rate tests")
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-3-ruanjinjie@huawei.com
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_connector_test.c | 24 +++++++++++-----------
+ 1 file changed, 12 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/gpu/drm/tests/drm_connector_test.c b/drivers/gpu/drm/tests/drm_connector_test.c
+index 15e36a8db6858..6bba97d0be88e 100644
+--- a/drivers/gpu/drm/tests/drm_connector_test.c
++++ b/drivers/gpu/drm/tests/drm_connector_test.c
+@@ -996,7 +996,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb(struct kunit *test)
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1017,7 +1017,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc(struct kunit *test)
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1038,7 +1038,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_10bpc_vic_1(struct kunit *t
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       rate = drm_hdmi_compute_mode_clock(mode, 10, HDMI_COLORSPACE_RGB);
+@@ -1056,7 +1056,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc(struct kunit *test)
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1077,7 +1077,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_12bpc_vic_1(struct kunit *t
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       rate = drm_hdmi_compute_mode_clock(mode, 12, HDMI_COLORSPACE_RGB);
+@@ -1095,7 +1095,7 @@ static void drm_test_drm_hdmi_compute_mode_clock_rgb_double(struct kunit *test)
+       unsigned long long rate;
+       struct drm_device *drm = &priv->drm;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 6);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 6);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_TRUE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1118,7 +1118,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_valid(struct kunit
+       unsigned long long rate;
+       unsigned int vic = *(unsigned int *)test->param_value;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, vic);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1155,7 +1155,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_10_bpc(struct kuni
+               drm_hdmi_compute_mode_clock_yuv420_vic_valid_tests[0];
+       unsigned long long rate;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, vic);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1180,7 +1180,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv420_12_bpc(struct kuni
+               drm_hdmi_compute_mode_clock_yuv420_vic_valid_tests[0];
+       unsigned long long rate;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, vic);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, vic);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1203,7 +1203,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_8_bpc(struct kunit
+       struct drm_device *drm = &priv->drm;
+       unsigned long long rate;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1225,7 +1225,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_10_bpc(struct kuni
+       struct drm_device *drm = &priv->drm;
+       unsigned long long rate;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+@@ -1247,7 +1247,7 @@ static void drm_test_connector_hdmi_compute_mode_clock_yuv422_12_bpc(struct kuni
+       struct drm_device *drm = &priv->drm;
+       unsigned long long rate;
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 16);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 16);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       KUNIT_ASSERT_FALSE(test, mode->flags & DRM_MODE_FLAG_DBLCLK);
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch b/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch

new file mode 100644 (file)

index 0000000..0e55245
--- /dev/null
+++ b/queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch
@@ -0,0 +1,89 @@
+From c6d8355bd7745dd50292fd94fe86e4eb519109cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Oct 2024 10:35:04 +0800
+Subject: drm/tests: hdmi: Fix memory leaks in drm_display_mode_from_cea_vic()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+[ Upstream commit add4163aca0d4a86e9fe4aa513865e4237db8aef ]
+
+modprobe drm_hdmi_state_helper_test and then rmmod it, the following
+memory leak occurs.
+
+The `mode` allocated in drm_mode_duplicate() called by
+drm_display_mode_from_cea_vic() is not freed, which cause the memory leak:
+
+       unreferenced object 0xffffff80ccd18100 (size 128):
+         comm "kunit_try_catch", pid 1851, jiffies 4295059695
+         hex dump (first 32 bytes):
+           57 62 00 00 80 02 90 02 f0 02 20 03 00 00 e0 01  Wb........ .....
+           ea 01 ec 01 0d 02 00 00 0a 00 00 00 00 00 00 00  ................
+         backtrace (crc c2f1aa95):
+           [<000000000f10b11b>] kmemleak_alloc+0x34/0x40
+           [<000000001cd4cf73>] __kmalloc_cache_noprof+0x26c/0x2f4
+           [<00000000f1f3cffa>] drm_mode_duplicate+0x44/0x19c
+           [<000000008cbeef13>] drm_display_mode_from_cea_vic+0x88/0x98
+           [<0000000019daaacf>] 0xffffffedc11ae69c
+           [<000000000aad0f85>] kunit_try_run_case+0x13c/0x3ac
+           [<00000000a9210bac>] kunit_generic_run_threadfn_adapter+0x80/0xec
+           [<000000000a0b2e9e>] kthread+0x2e8/0x374
+           [<00000000bd668858>] ret_from_fork+0x10/0x20
+       ......
+
+Free `mode` by using drm_kunit_display_mode_from_cea_vic()
+to fix it.
+
+Cc: stable@vger.kernel.org
+Fixes: 4af70f19e559 ("drm/tests: Add RGB Quantization tests")
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-4-ruanjinjie@huawei.com
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
+index 34ee95d41f296..294773342e710 100644
+--- a/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
++++ b/drivers/gpu/drm/tests/drm_hdmi_state_helper_test.c
+@@ -441,7 +441,7 @@ static void drm_test_check_broadcast_rgb_auto_cea_mode_vic_1(struct kunit *test)
+       ctx = drm_kunit_helper_acquire_ctx_alloc(test);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       drm = &priv->drm;
+@@ -555,7 +555,7 @@ static void drm_test_check_broadcast_rgb_full_cea_mode_vic_1(struct kunit *test)
+       ctx = drm_kunit_helper_acquire_ctx_alloc(test);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       drm = &priv->drm;
+@@ -671,7 +671,7 @@ static void drm_test_check_broadcast_rgb_limited_cea_mode_vic_1(struct kunit *te
+       ctx = drm_kunit_helper_acquire_ctx_alloc(test);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       drm = &priv->drm;
+@@ -1263,7 +1263,7 @@ static void drm_test_check_output_bpc_format_vic_1(struct kunit *test)
+       ctx = drm_kunit_helper_acquire_ctx_alloc(test);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ctx);
+ 
+-      mode = drm_display_mode_from_cea_vic(drm, 1);
++      mode = drm_kunit_display_mode_from_cea_vic(test, drm, 1);
+       KUNIT_ASSERT_NOT_NULL(test, mode);
+ 
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch b/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch

new file mode 100644 (file)

index 0000000..ea095a3
--- /dev/null
+++ b/queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch
@@ -0,0 +1,102 @@
+From 7d8d06f477ce63209b81a74c217ce2ce4b92c282 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Oct 2024 10:35:02 +0800
+Subject: drm/tests: helpers: Add helper for drm_display_mode_from_cea_vic()
+
+From: Jinjie Ruan <ruanjinjie@huawei.com>
+
+[ Upstream commit caa714f86699bcfb01aa2d698db12d91af7d0d81 ]
+
+As Maxime suggested, add a new helper
+drm_kunit_display_mode_from_cea_vic(), it can replace the direct call
+of drm_display_mode_from_cea_vic(), and it will help solving
+the `mode` memory leaks.
+
+Acked-by: Maxime Ripard <mripard@kernel.org>
+Suggested-by: Maxime Ripard <mripard@kernel.org>
+Signed-off-by: Jinjie Ruan <ruanjinjie@huawei.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241030023504.530425-2-ruanjinjie@huawei.com
+Signed-off-by: Maxime Ripard <mripard@kernel.org>
+Stable-dep-of: 926163342a2e ("drm/connector: hdmi: Fix memory leak in drm_display_mode_from_cea_vic()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tests/drm_kunit_helpers.c | 42 +++++++++++++++++++++++
+ include/drm/drm_kunit_helpers.h           |  4 +++
+ 2 files changed, 46 insertions(+)
+
+diff --git a/drivers/gpu/drm/tests/drm_kunit_helpers.c b/drivers/gpu/drm/tests/drm_kunit_helpers.c
+index aa62719dab0e4..04a6b8cc62ac6 100644
+--- a/drivers/gpu/drm/tests/drm_kunit_helpers.c
++++ b/drivers/gpu/drm/tests/drm_kunit_helpers.c
+@@ -3,6 +3,7 @@
+ #include <drm/drm_atomic.h>
+ #include <drm/drm_atomic_helper.h>
+ #include <drm/drm_drv.h>
++#include <drm/drm_edid.h>
+ #include <drm/drm_fourcc.h>
+ #include <drm/drm_kunit_helpers.h>
+ #include <drm/drm_managed.h>
+@@ -311,6 +312,47 @@ drm_kunit_helper_create_crtc(struct kunit *test,
+ }
+ EXPORT_SYMBOL_GPL(drm_kunit_helper_create_crtc);
+ 
++static void kunit_action_drm_mode_destroy(void *ptr)
++{
++      struct drm_display_mode *mode = ptr;
++
++      drm_mode_destroy(NULL, mode);
++}
++
++/**
++ * drm_kunit_display_mode_from_cea_vic() - return a mode for CEA VIC
++                                         for a KUnit test
++ * @test: The test context object
++ * @dev: DRM device
++ * @video_code: CEA VIC of the mode
++ *
++ * Creates a new mode matching the specified CEA VIC for a KUnit test.
++ *
++ * Resources will be cleaned up automatically.
++ *
++ * Returns: A new drm_display_mode on success or NULL on failure
++ */
++struct drm_display_mode *
++drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev,
++                                  u8 video_code)
++{
++      struct drm_display_mode *mode;
++      int ret;
++
++      mode = drm_display_mode_from_cea_vic(dev, video_code);
++      if (!mode)
++              return NULL;
++
++      ret = kunit_add_action_or_reset(test,
++                                      kunit_action_drm_mode_destroy,
++                                      mode);
++      if (ret)
++              return NULL;
++
++      return mode;
++}
++EXPORT_SYMBOL_GPL(drm_kunit_display_mode_from_cea_vic);
++
+ MODULE_AUTHOR("Maxime Ripard <maxime@cerno.tech>");
+ MODULE_DESCRIPTION("KUnit test suite helper functions");
+ MODULE_LICENSE("GPL");
+diff --git a/include/drm/drm_kunit_helpers.h b/include/drm/drm_kunit_helpers.h
+index e7cc17ee4934a..afdd46ef04f70 100644
+--- a/include/drm/drm_kunit_helpers.h
++++ b/include/drm/drm_kunit_helpers.h
+@@ -120,4 +120,8 @@ drm_kunit_helper_create_crtc(struct kunit *test,
+                            const struct drm_crtc_funcs *funcs,
+                            const struct drm_crtc_helper_funcs *helper_funcs);
+ 
++struct drm_display_mode *
++drm_kunit_display_mode_from_cea_vic(struct kunit *test, struct drm_device *dev,
++                                  u8 video_code);
++
+ #endif // DRM_KUNIT_HELPERS_H_
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch b/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch

new file mode 100644 (file)

index 0000000..3fea0bb
--- /dev/null
+++ b/queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch
@@ -0,0 +1,62 @@
+From ef77231ea76a679ce2c45b3633fc73d1968865ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 15:12:00 -0700
+Subject: drm/xe: Add mmio read before GGTT invalidate
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Matthew Brost <matthew.brost@intel.com>
+
+[ Upstream commit 993ca0eccec65a2cacc3cefb15d35ffadc6f00fb ]
+
+On LNL without a mmio read before a GGTT invalidate the GuC can
+incorrectly read the GGTT scratch page upon next access leading to jobs
+not getting scheduled. A mmio read before a GGTT invalidate seems to fix
+this. Since a GGTT invalidate is not a hot code path, blindly do a mmio
+read before each GGTT invalidate.
+
+Cc: John Harrison <John.C.Harrison@Intel.com>
+Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com>
+Cc: Thomas Hellström <thomas.hellstrom@linux.intel.com>
+Cc: Lucas De Marchi <lucas.demarchi@intel.com>
+Cc: stable@vger.kernel.org
+Fixes: dd08ebf6c352 ("drm/xe: Introduce a new DRM driver for Intel GPUs")
+Reported-by: Paulo Zanoni <paulo.r.zanoni@intel.com>
+Closes: https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/3164
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241023221200.1797832-1-matthew.brost@intel.com
+Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
+(cherry picked from commit 5a710196883e0ac019ac6df2a6d79c16ad3c32fa)
+[ Fix conflict with mmio vs gt argument ]
+Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_ggtt.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
+index 0cdbc1296e885..226542bb1442e 100644
+--- a/drivers/gpu/drm/xe/xe_ggtt.c
++++ b/drivers/gpu/drm/xe/xe_ggtt.c
+@@ -309,6 +309,16 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
+ 
+ static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
+ {
++      struct xe_device *xe = tile_to_xe(ggtt->tile);
++
++      /*
++       * XXX: Barrier for GGTT pages. Unsure exactly why this required but
++       * without this LNL is having issues with the GuC reading scratch page
++       * vs. correct GGTT page. Not particularly a hot code path so blindly
++       * do a mmio read here which results in GuC reading correct GGTT page.
++       */
++      xe_mmio_read32(xe_root_mmio_gt(xe), VF_CAP_REG);
++
+       /* Each GT in a tile has its own TLB to cache GGTT lookups */
+       ggtt_invalidate_gt_tlb(ggtt->tile->primary_gt);
+       ggtt_invalidate_gt_tlb(ggtt->tile->media_gt);
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch b/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch

new file mode 100644 (file)

index 0000000..1c4c35a
--- /dev/null
+++ b/queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch
@@ -0,0 +1,83 @@
+From 133a844e2efa74fa38eb3be012c531cbefec816a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 14:43:29 -0700
+Subject: drm/xe: Don't short circuit TDR on jobs not started
+
+From: Matthew Brost <matthew.brost@intel.com>
+
+[ Upstream commit fe05cee4d9533892210e1ee90147175d87e7c053 ]
+
+Short circuiting TDR on jobs not started is an optimization which is not
+required. On LNL we are facing an issue where jobs do not get scheduled
+by the GuC if it misses a GGTT page update. When this occurs let the TDR
+fire, toggle the scheduling which may get the job unstuck, and print a
+warning message. If the TDR fires twice on job that hasn't started,
+timeout the job.
+
+v2:
+ - Add warning message (Paulo)
+ - Add fixes tag (Paulo)
+ - Timeout job which hasn't started after TDR firing twice
+v3:
+ - Include local change
+v4:
+ - Short circuit check_timeout on job not started
+ - use warn level rather than notice (Paulo)
+
+Fixes: 7ddb9403dd74 ("drm/xe: Sample ctx timestamp to determine if jobs have timed out")
+Cc: stable@vger.kernel.org
+Cc: Paulo Zanoni <paulo.r.zanoni@intel.com>
+Signed-off-by: Matthew Brost <matthew.brost@intel.com>
+Reviewed-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20241025214330.2010521-2-matthew.brost@intel.com
+Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
+(cherry picked from commit 35d25a4a0012e690ef0cc4c5440231176db595cc)
+Signed-off-by: Lucas De Marchi <lucas.demarchi@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/xe_guc_submit.c | 18 ++++++++++++------
+ 1 file changed, 12 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
+index dfd809e7bbd25..cbdd44567d107 100644
+--- a/drivers/gpu/drm/xe/xe_guc_submit.c
++++ b/drivers/gpu/drm/xe/xe_guc_submit.c
+@@ -989,12 +989,22 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
+ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
+ {
+       struct xe_gt *gt = guc_to_gt(exec_queue_to_guc(q));
+-      u32 ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+-      u32 ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
++      u32 ctx_timestamp, ctx_job_timestamp;
+       u32 timeout_ms = q->sched_props.job_timeout_ms;
+       u32 diff;
+       u64 running_time_ms;
+ 
++      if (!xe_sched_job_started(job)) {
++              xe_gt_warn(gt, "Check job timeout: seqno=%u, lrc_seqno=%u, guc_id=%d, not started",
++                         xe_sched_job_seqno(job), xe_sched_job_lrc_seqno(job),
++                         q->guc->id);
++
++              return xe_sched_invalidate_job(job, 2);
++      }
++
++      ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
++      ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
++
+       /*
+        * Counter wraps at ~223s at the usual 19.2MHz, be paranoid catch
+        * possible overflows with a high timeout.
+@@ -1120,10 +1130,6 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
+               exec_queue_killed_or_banned_or_wedged(q) ||
+               exec_queue_destroyed(q);
+ 
+-      /* Job hasn't started, can't be timed out */
+-      if (!skip_timeout_check && !xe_sched_job_started(job))
+-              goto rearm;
+-
+       /*
+        * XXX: Sampling timeout doesn't work in wedged mode as we have to
+        * modify scheduling state to read timestamp. We could read the
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch b/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch

new file mode 100644 (file)

index 0000000..d5414d6
--- /dev/null
+++ b/queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch
@@ -0,0 +1,44 @@
+From 6a5c8533fc687e76f07c2c8014ca7f5fa9c9f106 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 20:37:02 +0200
+Subject: drm/xe: Fix register definition order in xe_regs.h
+
+From: Michal Wajdeczko <michal.wajdeczko@intel.com>
+
+[ Upstream commit 9dae9751c7b0086963f5cbb82424b5e4cf58f123 ]
+
+Swap XEHP_CLOCK_GATE_DIS(0x101014) with GU_DEBUG(x101018).
+
+Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
+Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
+Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240702183704.1022-2-michal.wajdeczko@intel.com
+Stable-dep-of: 993ca0eccec6 ("drm/xe: Add mmio read before GGTT invalidate")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/regs/xe_regs.h | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
+index 23e33ec849022..23ecba38ed419 100644
+--- a/drivers/gpu/drm/xe/regs/xe_regs.h
++++ b/drivers/gpu/drm/xe/regs/xe_regs.h
+@@ -24,12 +24,12 @@
+ #define   LMEM_INIT                           REG_BIT(7)
+ #define   DRIVERFLR                           REG_BIT(31)
+ 
+-#define GU_DEBUG                              XE_REG(0x101018)
+-#define   DRIVERFLR_STATUS                    REG_BIT(31)
+-
+ #define XEHP_CLOCK_GATE_DIS                   XE_REG(0x101014)
+ #define   SGSI_SIDECLK_DIS                    REG_BIT(17)
+ 
++#define GU_DEBUG                              XE_REG(0x101018)
++#define   DRIVERFLR_STATUS                    REG_BIT(31)
++
+ #define XEHP_MTCFG_ADDR                               XE_REG(0x101800)
+ #define   TILE_COUNT                          REG_GENMASK(15, 8)
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch b/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch

new file mode 100644 (file)

index 0000000..10a4a89
--- /dev/null
+++ b/queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch
@@ -0,0 +1,147 @@
+From c69ebdccd5fb1b3314bd863e30c4404e1e790b04 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Jul 2024 20:37:03 +0200
+Subject: drm/xe: Kill regs/xe_sriov_regs.h
+
+From: Michal Wajdeczko <michal.wajdeczko@intel.com>
+
+[ Upstream commit 466a6c3855cf00653c14a92a6e9f8ae50077b77d ]
+
+There is no real benefit to maintain a separate file. The register
+definitions related to SR-IOV can be placed in existing headers.
+
+Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com>
+Reviewed-by: Matt Roper <matthew.d.roper@intel.com>
+Reviewed-by: Himal Prasad Ghimiray <himal.prasad.ghimiray@intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20240702183704.1022-3-michal.wajdeczko@intel.com
+Stable-dep-of: 993ca0eccec6 ("drm/xe: Add mmio read before GGTT invalidate")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/xe/regs/xe_gt_regs.h    |  6 ++++++
+ drivers/gpu/drm/xe/regs/xe_regs.h       |  6 ++++++
+ drivers/gpu/drm/xe/regs/xe_sriov_regs.h | 23 -----------------------
+ drivers/gpu/drm/xe/xe_gt_sriov_pf.c     |  2 +-
+ drivers/gpu/drm/xe/xe_lmtt.c            |  2 +-
+ drivers/gpu/drm/xe/xe_sriov.c           |  2 +-
+ 6 files changed, 15 insertions(+), 26 deletions(-)
+ delete mode 100644 drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+
+diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+index 3c28650400586..a8c4998384d68 100644
+--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
++++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+@@ -91,6 +91,8 @@
+ #define VE1_AUX_INV                           XE_REG(0x42b8)
+ #define   AUX_INV                             REG_BIT(0)
+ 
++#define XE2_LMEM_CFG                          XE_REG(0x48b0)
++
+ #define XEHP_TILE_ADDR_RANGE(_idx)            XE_REG_MCR(0x4900 + (_idx) * 4)
+ #define XEHP_FLAT_CCS_BASE_ADDR                       XE_REG_MCR(0x4910)
+ #define XEHP_FLAT_CCS_PTR                     REG_GENMASK(31, 8)
+@@ -403,6 +405,10 @@
+ #define   INVALIDATION_BROADCAST_MODE_DIS     REG_BIT(12)
+ #define   GLOBAL_INVALIDATION_MODE            REG_BIT(2)
+ 
++#define LMEM_CFG                              XE_REG(0xcf58)
++#define   LMEM_EN                             REG_BIT(31)
++#define   LMTT_DIR_PTR                                REG_GENMASK(30, 0) /* in multiples of 64KB */
++
+ #define HALF_SLICE_CHICKEN5                   XE_REG_MCR(0xe188, XE_REG_OPTION_MASKED)
+ #define   DISABLE_SAMPLE_G_PERFORMANCE                REG_BIT(0)
+ 
+diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
+index 23ecba38ed419..55bf47c990169 100644
+--- a/drivers/gpu/drm/xe/regs/xe_regs.h
++++ b/drivers/gpu/drm/xe/regs/xe_regs.h
+@@ -30,6 +30,9 @@
+ #define GU_DEBUG                              XE_REG(0x101018)
+ #define   DRIVERFLR_STATUS                    REG_BIT(31)
+ 
++#define VIRTUAL_CTRL_REG                      XE_REG(0x10108c)
++#define   GUEST_GTT_UPDATE_EN                 REG_BIT(8)
++
+ #define XEHP_MTCFG_ADDR                               XE_REG(0x101800)
+ #define   TILE_COUNT                          REG_GENMASK(15, 8)
+ 
+@@ -66,6 +69,9 @@
+ #define   DISPLAY_IRQ                         REG_BIT(16)
+ #define   GT_DW_IRQ(x)                                REG_BIT(x)
+ 
++#define VF_CAP_REG                            XE_REG(0x1901f8, XE_REG_OPTION_VF)
++#define   VF_CAP                              REG_BIT(0)
++
+ #define PVC_RP_STATE_CAP                      XE_REG(0x281014)
+ 
+ #endif
+diff --git a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h b/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
+deleted file mode 100644
+index 017b4ddd1ecf4..0000000000000
+--- a/drivers/gpu/drm/xe/regs/xe_sriov_regs.h
++++ /dev/null
+@@ -1,23 +0,0 @@
+-/* SPDX-License-Identifier: MIT */
+-/*
+- * Copyright © 2023 Intel Corporation
+- */
+-
+-#ifndef _REGS_XE_SRIOV_REGS_H_
+-#define _REGS_XE_SRIOV_REGS_H_
+-
+-#include "regs/xe_reg_defs.h"
+-
+-#define XE2_LMEM_CFG                  XE_REG(0x48b0)
+-
+-#define LMEM_CFG                      XE_REG(0xcf58)
+-#define   LMEM_EN                     REG_BIT(31)
+-#define   LMTT_DIR_PTR                        REG_GENMASK(30, 0) /* in multiples of 64KB */
+-
+-#define VIRTUAL_CTRL_REG              XE_REG(0x10108c)
+-#define   GUEST_GTT_UPDATE_EN         REG_BIT(8)
+-
+-#define VF_CAP_REG                    XE_REG(0x1901f8, XE_REG_OPTION_VF)
+-#define   VF_CAP                      REG_BIT(0)
+-
+-#endif
+diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+index 9dbba9ab7a9ab..ef239440963ce 100644
+--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
++++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+@@ -5,7 +5,7 @@
+ 
+ #include <drm/drm_managed.h>
+ 
+-#include "regs/xe_sriov_regs.h"
++#include "regs/xe_regs.h"
+ 
+ #include "xe_gt_sriov_pf.h"
+ #include "xe_gt_sriov_pf_config.h"
+diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
+index 418661a889183..c5fdb36b6d336 100644
+--- a/drivers/gpu/drm/xe/xe_lmtt.c
++++ b/drivers/gpu/drm/xe/xe_lmtt.c
+@@ -7,7 +7,7 @@
+ 
+ #include <drm/drm_managed.h>
+ 
+-#include "regs/xe_sriov_regs.h"
++#include "regs/xe_gt_regs.h"
+ 
+ #include "xe_assert.h"
+ #include "xe_bo.h"
+diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
+index a274a5fb14018..5a1d65e4f19f2 100644
+--- a/drivers/gpu/drm/xe/xe_sriov.c
++++ b/drivers/gpu/drm/xe/xe_sriov.c
+@@ -5,7 +5,7 @@
+ 
+ #include <drm/drm_managed.h>
+ 
+-#include "regs/xe_sriov_regs.h"
++#include "regs/xe_regs.h"
+ 
+ #include "xe_assert.h"
+ #include "xe_device.h"
+-- 
+2.43.0
+
diff --git a/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch b/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch

new file mode 100644 (file)

index 0000000..e96464d
--- /dev/null
+++ b/queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch
@@ -0,0 +1,157 @@
+From 9a138c22afb6bcd6434157db0d6827b4424d6f13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 18:56:05 +0100
+Subject: fork: do not invoke uffd on fork if error occurs
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+[ Upstream commit f64e67e5d3a45a4a04286c47afade4b518acd47b ]
+
+Patch series "fork: do not expose incomplete mm on fork".
+
+During fork we may place the virtual memory address space into an
+inconsistent state before the fork operation is complete.
+
+In addition, we may encounter an error during the fork operation that
+indicates that the virtual memory address space is invalidated.
+
+As a result, we should not be exposing it in any way to external machinery
+that might interact with the mm or VMAs, machinery that is not designed to
+deal with incomplete state.
+
+We specifically update the fork logic to defer khugepaged and ksm to the
+end of the operation and only to be invoked if no error arose, and
+disallow uffd from observing fork events should an error have occurred.
+
+This patch (of 2):
+
+Currently on fork we expose the virtual address space of a process to
+userland unconditionally if uffd is registered in VMAs, regardless of
+whether an error arose in the fork.
+
+This is performed in dup_userfaultfd_complete() which is invoked
+unconditionally, and performs two duties - invoking registered handlers
+for the UFFD_EVENT_FORK event via dup_fctx(), and clearing down
+userfaultfd_fork_ctx objects established in dup_userfaultfd().
+
+This is problematic, because the virtual address space may not yet be
+correctly initialised if an error arose.
+
+The change in commit d24062914837 ("fork: use __mt_dup() to duplicate
+maple tree in dup_mmap()") makes this more pertinent as we may be in a
+state where entries in the maple tree are not yet consistent.
+
+We address this by, on fork error, ensuring that we roll back state that
+we would otherwise expect to clean up through the event being handled by
+userland and perform the memory freeing duty otherwise performed by
+dup_userfaultfd_complete().
+
+We do this by implementing a new function, dup_userfaultfd_fail(), which
+performs the same loop, only decrementing reference counts.
+
+Note that we perform mmgrab() on the parent and child mm's, however
+userfaultfd_ctx_put() will mmdrop() this once the reference count drops to
+zero, so we will avoid memory leaks correctly here.
+
+Link: https://lkml.kernel.org/r/cover.1729014377.git.lorenzo.stoakes@oracle.com
+Link: https://lkml.kernel.org/r/d3691d58bb58712b6fb3df2be441d175bd3cdf07.1729014377.git.lorenzo.stoakes@oracle.com
+Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: Jann Horn <jannh@google.com>
+Reviewed-by: Jann Horn <jannh@google.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: Vlastimil Babka <vbabka@suse.cz>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/userfaultfd.c              | 28 ++++++++++++++++++++++++++++
+ include/linux/userfaultfd_k.h |  5 +++++
+ kernel/fork.c                 |  5 ++++-
+ 3 files changed, 37 insertions(+), 1 deletion(-)
+
+diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
+index 27a3e9285fbf6..2f302da629cb4 100644
+--- a/fs/userfaultfd.c
++++ b/fs/userfaultfd.c
+@@ -731,6 +731,34 @@ void dup_userfaultfd_complete(struct list_head *fcs)
+       }
+ }
+ 
++void dup_userfaultfd_fail(struct list_head *fcs)
++{
++      struct userfaultfd_fork_ctx *fctx, *n;
++
++      /*
++       * An error has occurred on fork, we will tear memory down, but have
++       * allocated memory for fctx's and raised reference counts for both the
++       * original and child contexts (and on the mm for each as a result).
++       *
++       * These would ordinarily be taken care of by a user handling the event,
++       * but we are no longer doing so, so manually clean up here.
++       *
++       * mm tear down will take care of cleaning up VMA contexts.
++       */
++      list_for_each_entry_safe(fctx, n, fcs, list) {
++              struct userfaultfd_ctx *octx = fctx->orig;
++              struct userfaultfd_ctx *ctx = fctx->new;
++
++              atomic_dec(&octx->mmap_changing);
++              VM_BUG_ON(atomic_read(&octx->mmap_changing) < 0);
++              userfaultfd_ctx_put(octx);
++              userfaultfd_ctx_put(ctx);
++
++              list_del(&fctx->list);
++              kfree(fctx);
++      }
++}
++
+ void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+                            struct vm_userfaultfd_ctx *vm_ctx)
+ {
+diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
+index a12bcf042551e..f4a45a37229ad 100644
+--- a/include/linux/userfaultfd_k.h
++++ b/include/linux/userfaultfd_k.h
+@@ -249,6 +249,7 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma,
+ 
+ extern int dup_userfaultfd(struct vm_area_struct *, struct list_head *);
+ extern void dup_userfaultfd_complete(struct list_head *);
++void dup_userfaultfd_fail(struct list_head *);
+ 
+ extern void mremap_userfaultfd_prep(struct vm_area_struct *,
+                                   struct vm_userfaultfd_ctx *);
+@@ -332,6 +333,10 @@ static inline void dup_userfaultfd_complete(struct list_head *l)
+ {
+ }
+ 
++static inline void dup_userfaultfd_fail(struct list_head *l)
++{
++}
++
+ static inline void mremap_userfaultfd_prep(struct vm_area_struct *vma,
+                                          struct vm_userfaultfd_ctx *ctx)
+ {
+diff --git a/kernel/fork.c b/kernel/fork.c
+index dbf3c5d81df3b..6423ce60b8f97 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -775,7 +775,10 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+       mmap_write_unlock(mm);
+       flush_tlb_mm(oldmm);
+       mmap_write_unlock(oldmm);
+-      dup_userfaultfd_complete(&uf);
++      if (!retval)
++              dup_userfaultfd_complete(&uf);
++      else
++              dup_userfaultfd_fail(&uf);
+ fail_uprobe_end:
+       uprobe_end_dup_mmap();
+       return retval;
+-- 
+2.43.0
+
diff --git a/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch b/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch

new file mode 100644 (file)

index 0000000..c513ab2
--- /dev/null
+++ b/queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch
@@ -0,0 +1,112 @@
+From 40ae57f840f266659ae573e60968ea68dc27fb90 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 18:56:06 +0100
+Subject: fork: only invoke khugepaged, ksm hooks if no error
+
+From: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+
+[ Upstream commit 985da552a98e27096444508ce5d853244019111f ]
+
+There is no reason to invoke these hooks early against an mm that is in an
+incomplete state.
+
+The change in commit d24062914837 ("fork: use __mt_dup() to duplicate
+maple tree in dup_mmap()") makes this more pertinent as we may be in a
+state where entries in the maple tree are not yet consistent.
+
+Their placement early in dup_mmap() only appears to have been meaningful
+for early error checking, and since functionally it'd require a very small
+allocation to fail (in practice 'too small to fail') that'd only occur in
+the most dire circumstances, meaning the fork would fail or be OOM'd in
+any case.
+
+Since both khugepaged and KSM tracking are there to provide optimisations
+to memory performance rather than critical functionality, it doesn't
+really matter all that much if, under such dire memory pressure, we fail
+to register an mm with these.
+
+As a result, we follow the example of commit d2081b2bf819 ("mm:
+khugepaged: make khugepaged_enter() void function") and make ksm_fork() a
+void function also.
+
+We only expose the mm to these functions once we are done with them and
+only if no error occurred in the fork operation.
+
+Link: https://lkml.kernel.org/r/e0cb8b840c9d1d5a6e84d4f8eff5f3f2022aa10c.1729014377.git.lorenzo.stoakes@oracle.com
+Fixes: d24062914837 ("fork: use __mt_dup() to duplicate maple tree in dup_mmap()")
+Signed-off-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reported-by: Jann Horn <jannh@google.com>
+Reviewed-by: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Jann Horn <jannh@google.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Christian Brauner <brauner@kernel.org>
+Cc: Jan Kara <jack@suse.cz>
+Cc: Linus Torvalds <torvalds@linuxfoundation.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/ksm.h | 10 ++++------
+ kernel/fork.c       |  7 ++-----
+ 2 files changed, 6 insertions(+), 11 deletions(-)
+
+diff --git a/include/linux/ksm.h b/include/linux/ksm.h
+index 11690dacd9868..ec9c05044d4fe 100644
+--- a/include/linux/ksm.h
++++ b/include/linux/ksm.h
+@@ -54,12 +54,11 @@ static inline long mm_ksm_zero_pages(struct mm_struct *mm)
+       return atomic_long_read(&mm->ksm_zero_pages);
+ }
+ 
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+ {
++      /* Adding mm to ksm is best effort on fork. */
+       if (test_bit(MMF_VM_MERGEABLE, &oldmm->flags))
+-              return __ksm_enter(mm);
+-
+-      return 0;
++              __ksm_enter(mm);
+ }
+ 
+ static inline int ksm_execve(struct mm_struct *mm)
+@@ -107,9 +106,8 @@ static inline int ksm_disable(struct mm_struct *mm)
+       return 0;
+ }
+ 
+-static inline int ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
++static inline void ksm_fork(struct mm_struct *mm, struct mm_struct *oldmm)
+ {
+-      return 0;
+ }
+ 
+ static inline int ksm_execve(struct mm_struct *mm)
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 6423ce60b8f97..dc08a23747338 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -653,11 +653,6 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+       mm->exec_vm = oldmm->exec_vm;
+       mm->stack_vm = oldmm->stack_vm;
+ 
+-      retval = ksm_fork(mm, oldmm);
+-      if (retval)
+-              goto out;
+-      khugepaged_fork(mm, oldmm);
+-
+       /* Use __mt_dup() to efficiently build an identical maple tree. */
+       retval = __mt_dup(&oldmm->mm_mt, &mm->mm_mt, GFP_KERNEL);
+       if (unlikely(retval))
+@@ -760,6 +755,8 @@ static __latent_entropy int dup_mmap(struct mm_struct *mm,
+       vma_iter_free(&vmi);
+       if (!retval) {
+               mt_set_in_rcu(vmi.mas.tree);
++              ksm_fork(mm, oldmm);
++              khugepaged_fork(mm, oldmm);
+       } else if (mpnt) {
+               /*
+                * The entire maple tree has already been duplicated. If the
+-- 
+2.43.0
+
diff --git a/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch b/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch

new file mode 100644 (file)

index 0000000..cdddb44
--- /dev/null
+++ b/queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch
@@ -0,0 +1,39 @@
+From d622bb0ff1402d90cf7b5fc220887f64d4a6b2d1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 13:49:59 +0100
+Subject: gpiolib: fix debugfs dangling chip separator
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 604888f8c3d01fddd9366161efc65cb3182831f1 ]
+
+Add the missing newline after entries for recently removed gpio chips
+so that the chip sections are separated by a newline as intended.
+
+Fixes: e348544f7994 ("gpio: protect the list of GPIO devices with SRCU")
+Cc: stable@vger.kernel.org     # 6.9
+Cc: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20241028125000.24051-3-johan+linaro@kernel.org
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpiolib.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index 1f522499c6fc5..337971080dfde 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -4879,7 +4879,7 @@ static int gpiolib_seq_show(struct seq_file *s, void *v)
+ 
+       gc = srcu_dereference(gdev->chip, &gdev->srcu);
+       if (!gc) {
+-              seq_printf(s, "%s%s: (dangling chip)",
++              seq_printf(s, "%s%s: (dangling chip)\n",
+                          priv->newline ? "\n" : "",
+                          dev_name(&gdev->dev));
+               return 0;
+-- 
+2.43.0
+
diff --git a/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch b/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch

new file mode 100644 (file)

index 0000000..c0b2c26
--- /dev/null
+++ b/queue-6.11/gpiolib-fix-debugfs-newline-separators.patch
@@ -0,0 +1,47 @@
+From e36937357da8906fa4217d93f2830a8cbcddd910 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 13:49:58 +0100
+Subject: gpiolib: fix debugfs newline separators
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit 3e8b7238b427e05498034c240451af5f5495afda ]
+
+The gpiolib debugfs interface exports a list of all gpio chips in a
+system and the state of their pins.
+
+The gpio chip sections are supposed to be separated by a newline
+character, but a long-standing bug prevents the separator from
+being included when output is generated in multiple sessions, making the
+output inconsistent and hard to read.
+
+Make sure to only suppress the newline separator at the beginning of the
+file as intended.
+
+Fixes: f9c4a31f6150 ("gpiolib: Use seq_file's iterator interface")
+Cc: stable@vger.kernel.org     # 3.7
+Cc: Thierry Reding <treding@nvidia.com>
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Link: https://lore.kernel.org/r/20241028125000.24051-2-johan+linaro@kernel.org
+Signed-off-by: Bartosz Golaszewski <bartosz.golaszewski@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpio/gpiolib.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
+index 148bcfbf98e02..1f522499c6fc5 100644
+--- a/drivers/gpio/gpiolib.c
++++ b/drivers/gpio/gpiolib.c
+@@ -4834,6 +4834,8 @@ static void *gpiolib_seq_start(struct seq_file *s, loff_t *pos)
+               return NULL;
+ 
+       s->private = priv;
++      if (*pos > 0)
++              priv->newline = true;
+       priv->idx = srcu_read_lock(&gpio_devices_srcu);
+ 
+       list_for_each_entry_srcu(gdev, &gpio_devices, list,
+-- 
+2.43.0
+
diff --git a/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046 b/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046

new file mode 100644 (file)

index 0000000..9e7eef7
--- /dev/null
+++ b/queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046
@@ -0,0 +1,47 @@
+From 617e0a742fa1ec269638d280602c22c4ebd702b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 19:04:31 +0200
+Subject: iio: light: veml6030: fix microlux value calculation
+
+From: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+
+[ Upstream commit 63dd163cd61dda6f38343776b42331cc6b7e56e0 ]
+
+The raw value conversion to obtain a measurement in lux as
+INT_PLUS_MICRO does not calculate the decimal part properly to display
+it as micro (in this case microlux). It only calculates the module to
+obtain the decimal part from a resolution that is 10000 times the
+provided in the datasheet (0.5376 lux/cnt for the veml6030). The
+resulting value must still be multiplied by 100 to make it micro.
+
+This bug was introduced with the original implementation of the driver.
+
+Only the illuminance channel is fixed becuase the scale is non sensical
+for the intensity channels anyway.
+
+Cc: stable@vger.kernel.org
+Fixes: 7b779f573c48 ("iio: light: add driver for veml6030 ambient light sensor")
+Signed-off-by: Javier Carrasco <javier.carrasco.cruz@gmail.com>
+Link: https://patch.msgid.link/20241016-veml6030-fix-processed-micro-v1-1-4a5644796437@gmail.com
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/light/veml6030.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iio/light/veml6030.c b/drivers/iio/light/veml6030.c
+index 621428885455c..8e76c828afddc 100644
+--- a/drivers/iio/light/veml6030.c
++++ b/drivers/iio/light/veml6030.c
+@@ -535,7 +535,7 @@ static int veml6030_read_raw(struct iio_dev *indio_dev,
+                       }
+                       if (mask == IIO_CHAN_INFO_PROCESSED) {
+                               *val = (reg * data->cur_resolution) / 10000;
+-                              *val2 = (reg * data->cur_resolution) % 10000;
++                              *val2 = (reg * data->cur_resolution) % 10000 * 100;
+                               return IIO_VAL_INT_PLUS_MICRO;
+                       }
+                       *val = reg;
+-- 
+2.43.0
+
diff --git a/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch b/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch

new file mode 100644 (file)

index 0000000..3794a02
--- /dev/null
+++ b/queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch
@@ -0,0 +1,82 @@
+From 598aa32497b9cd1af523303a8bc56899807ebf31 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 18 Oct 2024 17:17:48 -0700
+Subject: Input: edt-ft5x06 - fix regmap leak when probe fails
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+[ Upstream commit bffdf9d7e51a7be8eeaac2ccf9e54a5fde01ff65 ]
+
+The driver neglects to free the instance of I2C regmap constructed at
+the beginning of the edt_ft5x06_ts_probe() method when probe fails.
+Additionally edt_ft5x06_ts_remove() is freeing the regmap too early,
+before the rest of the device resources that are managed by devm are
+released.
+
+Fix this by installing a custom devm action that will ensure that the
+regmap is released at the right time during normal teardown as well as
+in case of probe failure.
+
+Note that devm_regmap_init_i2c() could not be used because the driver
+may replace the original regmap with a regmap specific for M06 devices
+in the middle of the probe, and using devm_regmap_init_i2c() would
+result in releasing the M06 regmap too early.
+
+Reported-by: Li Zetao <lizetao1@huawei.com>
+Fixes: 9dfd9708ffba ("Input: edt-ft5x06 - convert to use regmap API")
+Cc: stable@vger.kernel.org
+Reviewed-by: Oliver Graute <oliver.graute@kococonnector.com>
+Link: https://lore.kernel.org/r/ZxL6rIlVlgsAu-Jv@google.com
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/touchscreen/edt-ft5x06.c | 19 ++++++++++++++++++-
+ 1 file changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/input/touchscreen/edt-ft5x06.c b/drivers/input/touchscreen/edt-ft5x06.c
+index e70415f189a55..126b0ed85aa50 100644
+--- a/drivers/input/touchscreen/edt-ft5x06.c
++++ b/drivers/input/touchscreen/edt-ft5x06.c
+@@ -1121,6 +1121,14 @@ static void edt_ft5x06_ts_set_regs(struct edt_ft5x06_ts_data *tsdata)
+       }
+ }
+ 
++static void edt_ft5x06_exit_regmap(void *arg)
++{
++      struct edt_ft5x06_ts_data *data = arg;
++
++      if (!IS_ERR_OR_NULL(data->regmap))
++              regmap_exit(data->regmap);
++}
++
+ static void edt_ft5x06_disable_regulators(void *arg)
+ {
+       struct edt_ft5x06_ts_data *data = arg;
+@@ -1154,6 +1162,16 @@ static int edt_ft5x06_ts_probe(struct i2c_client *client)
+               return PTR_ERR(tsdata->regmap);
+       }
+ 
++      /*
++       * We are not using devm_regmap_init_i2c() and instead install a
++       * custom action because we may replace regmap with M06-specific one
++       * and we need to make sure that it will not be released too early.
++       */
++      error = devm_add_action_or_reset(&client->dev, edt_ft5x06_exit_regmap,
++                                       tsdata);
++      if (error)
++              return error;
++
+       chip_data = device_get_match_data(&client->dev);
+       if (!chip_data)
+               chip_data = (const struct edt_i2c_chip_data *)id->driver_data;
+@@ -1347,7 +1365,6 @@ static void edt_ft5x06_ts_remove(struct i2c_client *client)
+       struct edt_ft5x06_ts_data *tsdata = i2c_get_clientdata(client);
+ 
+       edt_ft5x06_ts_teardown_debugfs(tsdata);
+-      regmap_exit(tsdata->regmap);
+ }
+ 
+ static int edt_ft5x06_ts_suspend(struct device *dev)
+-- 
+2.43.0
+
diff --git a/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch b/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch

new file mode 100644 (file)

index 0000000..bf56486
--- /dev/null
+++ b/queue-6.11/input-fix-regression-when-re-registering-input-handl.patch
@@ -0,0 +1,253 @@
+From 0284dac550ec179dc0ddc39ab67ba9f172876b4f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Oct 2024 22:31:15 -0700
+Subject: Input: fix regression when re-registering input handlers
+
+From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+
+[ Upstream commit 071b24b54d2d05fbf39ddbb27dee08abd1d713f3 ]
+
+Commit d469647bafd9 ("Input: simplify event handling logic") introduced
+code that would set handler->events() method to either
+input_handler_events_filter() or input_handler_events_default() or
+input_handler_events_null(), depending on the kind of input handler
+(a filter or a regular one) we are dealing with. Unfortunately this
+breaks cases when we try to re-register the same filter (as is the case
+with sysrq handler): after initial registration the handler will have 2
+event handling methods defined, and will run afoul of the check in
+input_handler_check_methods():
+
+       input: input_handler_check_methods: only one event processing method can be defined (sysrq)
+       sysrq: Failed to register input handler, error -22
+
+Fix this by adding handle_events() method to input_handle structure and
+setting it up when registering a new input handle according to event
+handling methods defined in associated input_handler structure, thus
+avoiding modifying the input_handler structure.
+
+Reported-by: "Ned T. Crigler" <crigler@gmail.com>
+Reported-by: Christian Heusel <christian@heusel.eu>
+Tested-by: "Ned T. Crigler" <crigler@gmail.com>
+Tested-by: Peter Seiderer <ps.report@gmx.net>
+Fixes: d469647bafd9 ("Input: simplify event handling logic")
+Link: https://lore.kernel.org/r/Zx2iQp6csn42PJA7@xavtug
+Cc: stable@vger.kernel.org
+Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/input.c | 134 +++++++++++++++++++++++-------------------
+ include/linux/input.h |  10 +++-
+ 2 files changed, 82 insertions(+), 62 deletions(-)
+
+diff --git a/drivers/input/input.c b/drivers/input/input.c
+index 54c57b267b25f..865d3f8e97a66 100644
+--- a/drivers/input/input.c
++++ b/drivers/input/input.c
+@@ -119,12 +119,12 @@ static void input_pass_values(struct input_dev *dev,
+ 
+       handle = rcu_dereference(dev->grab);
+       if (handle) {
+-              count = handle->handler->events(handle, vals, count);
++              count = handle->handle_events(handle, vals, count);
+       } else {
+               list_for_each_entry_rcu(handle, &dev->h_list, d_node)
+                       if (handle->open) {
+-                              count = handle->handler->events(handle, vals,
+-                                                              count);
++                              count = handle->handle_events(handle, vals,
++                                                            count);
+                               if (!count)
+                                       break;
+                       }
+@@ -2537,57 +2537,6 @@ static int input_handler_check_methods(const struct input_handler *handler)
+       return 0;
+ }
+ 
+-/*
+- * An implementation of input_handler's events() method that simply
+- * invokes handler->event() method for each event one by one.
+- */
+-static unsigned int input_handler_events_default(struct input_handle *handle,
+-                                               struct input_value *vals,
+-                                               unsigned int count)
+-{
+-      struct input_handler *handler = handle->handler;
+-      struct input_value *v;
+-
+-      for (v = vals; v != vals + count; v++)
+-              handler->event(handle, v->type, v->code, v->value);
+-
+-      return count;
+-}
+-
+-/*
+- * An implementation of input_handler's events() method that invokes
+- * handler->filter() method for each event one by one and removes events
+- * that were filtered out from the "vals" array.
+- */
+-static unsigned int input_handler_events_filter(struct input_handle *handle,
+-                                              struct input_value *vals,
+-                                              unsigned int count)
+-{
+-      struct input_handler *handler = handle->handler;
+-      struct input_value *end = vals;
+-      struct input_value *v;
+-
+-      for (v = vals; v != vals + count; v++) {
+-              if (handler->filter(handle, v->type, v->code, v->value))
+-                      continue;
+-              if (end != v)
+-                      *end = *v;
+-              end++;
+-      }
+-
+-      return end - vals;
+-}
+-
+-/*
+- * An implementation of input_handler's events() method that does nothing.
+- */
+-static unsigned int input_handler_events_null(struct input_handle *handle,
+-                                            struct input_value *vals,
+-                                            unsigned int count)
+-{
+-      return count;
+-}
+-
+ /**
+  * input_register_handler - register a new input handler
+  * @handler: handler to be registered
+@@ -2607,13 +2556,6 @@ int input_register_handler(struct input_handler *handler)
+ 
+       INIT_LIST_HEAD(&handler->h_list);
+ 
+-      if (handler->filter)
+-              handler->events = input_handler_events_filter;
+-      else if (handler->event)
+-              handler->events = input_handler_events_default;
+-      else if (!handler->events)
+-              handler->events = input_handler_events_null;
+-
+       error = mutex_lock_interruptible(&input_mutex);
+       if (error)
+               return error;
+@@ -2687,6 +2629,75 @@ int input_handler_for_each_handle(struct input_handler *handler, void *data,
+ }
+ EXPORT_SYMBOL(input_handler_for_each_handle);
+ 
++/*
++ * An implementation of input_handle's handle_events() method that simply
++ * invokes handler->event() method for each event one by one.
++ */
++static unsigned int input_handle_events_default(struct input_handle *handle,
++                                              struct input_value *vals,
++                                              unsigned int count)
++{
++      struct input_handler *handler = handle->handler;
++      struct input_value *v;
++
++      for (v = vals; v != vals + count; v++)
++              handler->event(handle, v->type, v->code, v->value);
++
++      return count;
++}
++
++/*
++ * An implementation of input_handle's handle_events() method that invokes
++ * handler->filter() method for each event one by one and removes events
++ * that were filtered out from the "vals" array.
++ */
++static unsigned int input_handle_events_filter(struct input_handle *handle,
++                                             struct input_value *vals,
++                                             unsigned int count)
++{
++      struct input_handler *handler = handle->handler;
++      struct input_value *end = vals;
++      struct input_value *v;
++
++      for (v = vals; v != vals + count; v++) {
++              if (handler->filter(handle, v->type, v->code, v->value))
++                      continue;
++              if (end != v)
++                      *end = *v;
++              end++;
++      }
++
++      return end - vals;
++}
++
++/*
++ * An implementation of input_handle's handle_events() method that does nothing.
++ */
++static unsigned int input_handle_events_null(struct input_handle *handle,
++                                           struct input_value *vals,
++                                           unsigned int count)
++{
++      return count;
++}
++
++/*
++ * Sets up appropriate handle->event_handler based on the input_handler
++ * associated with the handle.
++ */
++static void input_handle_setup_event_handler(struct input_handle *handle)
++{
++      struct input_handler *handler = handle->handler;
++
++      if (handler->filter)
++              handle->handle_events = input_handle_events_filter;
++      else if (handler->event)
++              handle->handle_events = input_handle_events_default;
++      else if (handler->events)
++              handle->handle_events = handler->events;
++      else
++              handle->handle_events = input_handle_events_null;
++}
++
+ /**
+  * input_register_handle - register a new input handle
+  * @handle: handle to register
+@@ -2704,6 +2715,7 @@ int input_register_handle(struct input_handle *handle)
+       struct input_dev *dev = handle->dev;
+       int error;
+ 
++      input_handle_setup_event_handler(handle);
+       /*
+        * We take dev->mutex here to prevent race with
+        * input_release_device().
+diff --git a/include/linux/input.h b/include/linux/input.h
+index 89a0be6ee0e23..cd866b020a01d 100644
+--- a/include/linux/input.h
++++ b/include/linux/input.h
+@@ -339,12 +339,16 @@ struct input_handler {
+  * @name: name given to the handle by handler that created it
+  * @dev: input device the handle is attached to
+  * @handler: handler that works with the device through this handle
++ * @handle_events: event sequence handler. It is set up by the input core
++ *    according to event handling method specified in the @handler. See
++ *    input_handle_setup_event_handler().
++ *    This method is being called by the input core with interrupts disabled
++ *    and dev->event_lock spinlock held and so it may not sleep.
+  * @d_node: used to put the handle on device's list of attached handles
+  * @h_node: used to put the handle on handler's list of handles from which
+  *    it gets events
+  */
+ struct input_handle {
+-
+       void *private;
+ 
+       int open;
+@@ -353,6 +357,10 @@ struct input_handle {
+       struct input_dev *dev;
+       struct input_handler *handler;
+ 
++      unsigned int (*handle_events)(struct input_handle *handle,
++                                    struct input_value *vals,
++                                    unsigned int count);
++
+       struct list_head        d_node;
+       struct list_head        h_node;
+ };
+-- 
+2.43.0
+
diff --git a/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch b/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch

new file mode 100644 (file)

index 0000000..8235fb1
--- /dev/null
+++ b/queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch
@@ -0,0 +1,121 @@
+From abf6e0b067d865cd89978d9399581fe76c39e449 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Oct 2024 08:05:44 -0600
+Subject: io_uring/rw: fix missing NOWAIT check for O_DIRECT start write
+
+From: Jens Axboe <axboe@kernel.dk>
+
+[ Upstream commit 1d60d74e852647255bd8e76f5a22dc42531e4389 ]
+
+When io_uring starts a write, it'll call kiocb_start_write() to bump the
+super block rwsem, preventing any freezes from happening while that
+write is in-flight. The freeze side will grab that rwsem for writing,
+excluding any new writers from happening and waiting for existing writes
+to finish. But io_uring unconditionally uses kiocb_start_write(), which
+will block if someone is currently attempting to freeze the mount point.
+This causes a deadlock where freeze is waiting for previous writes to
+complete, but the previous writes cannot complete, as the task that is
+supposed to complete them is blocked waiting on starting a new write.
+This results in the following stuck trace showing that dependency with
+the write blocked starting a new write:
+
+task:fio             state:D stack:0     pid:886   tgid:886   ppid:876
+Call trace:
+ __switch_to+0x1d8/0x348
+ __schedule+0x8e8/0x2248
+ schedule+0x110/0x3f0
+ percpu_rwsem_wait+0x1e8/0x3f8
+ __percpu_down_read+0xe8/0x500
+ io_write+0xbb8/0xff8
+ io_issue_sqe+0x10c/0x1020
+ io_submit_sqes+0x614/0x2110
+ __arm64_sys_io_uring_enter+0x524/0x1038
+ invoke_syscall+0x74/0x268
+ el0_svc_common.constprop.0+0x160/0x238
+ do_el0_svc+0x44/0x60
+ el0_svc+0x44/0xb0
+ el0t_64_sync_handler+0x118/0x128
+ el0t_64_sync+0x168/0x170
+INFO: task fsfreeze:7364 blocked for more than 15 seconds.
+      Not tainted 6.12.0-rc5-00063-g76aaf945701c #7963
+
+with the attempting freezer stuck trying to grab the rwsem:
+
+task:fsfreeze        state:D stack:0     pid:7364  tgid:7364  ppid:995
+Call trace:
+ __switch_to+0x1d8/0x348
+ __schedule+0x8e8/0x2248
+ schedule+0x110/0x3f0
+ percpu_down_write+0x2b0/0x680
+ freeze_super+0x248/0x8a8
+ do_vfs_ioctl+0x149c/0x1b18
+ __arm64_sys_ioctl+0xd0/0x1a0
+ invoke_syscall+0x74/0x268
+ el0_svc_common.constprop.0+0x160/0x238
+ do_el0_svc+0x44/0x60
+ el0_svc+0x44/0xb0
+ el0t_64_sync_handler+0x118/0x128
+ el0t_64_sync+0x168/0x170
+
+Fix this by having the io_uring side honor IOCB_NOWAIT, and only attempt a
+blocking grab of the super block rwsem if it isn't set. For normal issue
+where IOCB_NOWAIT would always be set, this returns -EAGAIN which will
+have io_uring core issue a blocking attempt of the write. That will in
+turn also get completions run, ensuring forward progress.
+
+Since freezing requires CAP_SYS_ADMIN in the first place, this isn't
+something that can be triggered by a regular user.
+
+Cc: stable@vger.kernel.org # 5.10+
+Reported-by: Peter Mann <peter.mann@sh.cz>
+Link: https://lore.kernel.org/io-uring/38c94aec-81c9-4f62-b44e-1d87f5597644@sh.cz
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ io_uring/rw.c | 23 +++++++++++++++++++++--
+ 1 file changed, 21 insertions(+), 2 deletions(-)
+
+diff --git a/io_uring/rw.c b/io_uring/rw.c
+index 6b3bc0876f7fe..19e2c1f9c4a21 100644
+--- a/io_uring/rw.c
++++ b/io_uring/rw.c
+@@ -1016,6 +1016,25 @@ int io_read_mshot(struct io_kiocb *req, unsigned int issue_flags)
+       return IOU_OK;
+ }
+ 
++static bool io_kiocb_start_write(struct io_kiocb *req, struct kiocb *kiocb)
++{
++      struct inode *inode;
++      bool ret;
++
++      if (!(req->flags & REQ_F_ISREG))
++              return true;
++      if (!(kiocb->ki_flags & IOCB_NOWAIT)) {
++              kiocb_start_write(kiocb);
++              return true;
++      }
++
++      inode = file_inode(kiocb->ki_filp);
++      ret = sb_start_write_trylock(inode->i_sb);
++      if (ret)
++              __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
++      return ret;
++}
++
+ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+ {
+       bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
+@@ -1053,8 +1072,8 @@ int io_write(struct io_kiocb *req, unsigned int issue_flags)
+       if (unlikely(ret))
+               return ret;
+ 
+-      if (req->flags & REQ_F_ISREG)
+-              kiocb_start_write(kiocb);
++      if (unlikely(!io_kiocb_start_write(req, kiocb)))
++              return -EAGAIN;
+       kiocb->ki_flags |= IOCB_WRITE;
+ 
+       if (likely(req->file->f_op->write_iter))
+-- 
+2.43.0
+
diff --git a/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch b/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch

new file mode 100644 (file)

index 0000000..eae6384
--- /dev/null
+++ b/queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch
@@ -0,0 +1,70 @@
+From 42c16d9696004979bb2213d9208be27c1c867551 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 27 Oct 2024 15:23:23 -0700
+Subject: iov_iter: fix copy_page_from_iter_atomic() if KMAP_LOCAL_FORCE_MAP
+
+From: Hugh Dickins <hughd@google.com>
+
+[ Upstream commit c749d9b7ebbc5716af7a95f7768634b30d9446ec ]
+
+generic/077 on x86_32 CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP=y with highmem,
+on huge=always tmpfs, issues a warning and then hangs (interruptibly):
+
+WARNING: CPU: 5 PID: 3517 at mm/highmem.c:622 kunmap_local_indexed+0x62/0xc9
+CPU: 5 UID: 0 PID: 3517 Comm: cp Not tainted 6.12.0-rc4 #2
+...
+copy_page_from_iter_atomic+0xa6/0x5ec
+generic_perform_write+0xf6/0x1b4
+shmem_file_write_iter+0x54/0x67
+
+Fix copy_page_from_iter_atomic() by limiting it in that case
+(include/linux/skbuff.h skb_frag_must_loop() does similar).
+
+But going forward, perhaps CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP is too
+surprising, has outlived its usefulness, and should just be removed?
+
+Fixes: 908a1ad89466 ("iov_iter: Handle compound highmem pages in copy_page_from_iter_atomic()")
+Signed-off-by: Hugh Dickins <hughd@google.com>
+Link: https://lore.kernel.org/r/dd5f0c89-186e-18e1-4f43-19a60f5a9774@google.com
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Cc: stable@vger.kernel.org
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ lib/iov_iter.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 4a6a9f419bd7e..b892894228b03 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -461,6 +461,8 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+               size_t bytes, struct iov_iter *i)
+ {
+       size_t n, copied = 0;
++      bool uses_kmap = IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP) ||
++                       PageHighMem(page);
+ 
+       if (!page_copy_sane(page, offset, bytes))
+               return 0;
+@@ -471,7 +473,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+               char *p;
+ 
+               n = bytes - copied;
+-              if (PageHighMem(page)) {
++              if (uses_kmap) {
+                       page += offset / PAGE_SIZE;
+                       offset %= PAGE_SIZE;
+                       n = min_t(size_t, n, PAGE_SIZE - offset);
+@@ -482,7 +484,7 @@ size_t copy_page_from_iter_atomic(struct page *page, size_t offset,
+               kunmap_atomic(p);
+               copied += n;
+               offset += n;
+-      } while (PageHighMem(page) && copied != bytes && n > 0);
++      } while (uses_kmap && copied != bytes && n > 0);
+ 
+       return copied;
+ }
+-- 
+2.43.0
+
diff --git a/queue-6.11/kasan-remove-vmalloc_percpu-test.patch b/queue-6.11/kasan-remove-vmalloc_percpu-test.patch

new file mode 100644 (file)

index 0000000..b9780e7
--- /dev/null
+++ b/queue-6.11/kasan-remove-vmalloc_percpu-test.patch
@@ -0,0 +1,87 @@
+From 649215b22f013e76187a53e7f19ed2f2a4480fd3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:07:06 +0200
+Subject: kasan: remove vmalloc_percpu test
+
+From: Andrey Konovalov <andreyknvl@gmail.com>
+
+[ Upstream commit 330d8df81f3673d6fb74550bbc9bb159d81b35f7 ]
+
+Commit 1a2473f0cbc0 ("kasan: improve vmalloc tests") added the
+vmalloc_percpu KASAN test with the assumption that __alloc_percpu always
+uses vmalloc internally, which is tagged by KASAN.
+
+However, __alloc_percpu might allocate memory from the first per-CPU
+chunk, which is not allocated via vmalloc().  As a result, the test might
+fail.
+
+Remove the test until proper KASAN annotation for the per-CPU allocated
+are added; tracked in https://bugzilla.kernel.org/show_bug.cgi?id=215019.
+
+Link: https://lkml.kernel.org/r/20241022160706.38943-1-andrey.konovalov@linux.dev
+Fixes: 1a2473f0cbc0 ("kasan: improve vmalloc tests")
+Signed-off-by: Andrey Konovalov <andreyknvl@gmail.com>
+Reported-by: Samuel Holland <samuel.holland@sifive.com>
+Link: https://lore.kernel.org/all/4a245fff-cc46-44d1-a5f9-fd2f1c3764ae@sifive.com/
+Reported-by: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+Link: https://lore.kernel.org/all/CACzwLxiWzNqPBp4C1VkaXZ2wDwvY3yZeetCi1TLGFipKW77drA@mail.gmail.com/
+Cc: Alexander Potapenko <glider@google.com>
+Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
+Cc: Dmitry Vyukov <dvyukov@google.com>
+Cc: Marco Elver <elver@google.com>
+Cc: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/kasan/kasan_test.c | 27 ---------------------------
+ 1 file changed, 27 deletions(-)
+
+diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c
+index 7b32be2a3cf0e..9efde47f80698 100644
+--- a/mm/kasan/kasan_test.c
++++ b/mm/kasan/kasan_test.c
+@@ -1765,32 +1765,6 @@ static void vm_map_ram_tags(struct kunit *test)
+       free_pages((unsigned long)p_ptr, 1);
+ }
+ 
+-static void vmalloc_percpu(struct kunit *test)
+-{
+-      char __percpu *ptr;
+-      int cpu;
+-
+-      /*
+-       * This test is specifically crafted for the software tag-based mode,
+-       * the only tag-based mode that poisons percpu mappings.
+-       */
+-      KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_SW_TAGS);
+-
+-      ptr = __alloc_percpu(PAGE_SIZE, PAGE_SIZE);
+-
+-      for_each_possible_cpu(cpu) {
+-              char *c_ptr = per_cpu_ptr(ptr, cpu);
+-
+-              KUNIT_EXPECT_GE(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_MIN);
+-              KUNIT_EXPECT_LT(test, (u8)get_tag(c_ptr), (u8)KASAN_TAG_KERNEL);
+-
+-              /* Make sure that in-bounds accesses don't crash the kernel. */
+-              *c_ptr = 0;
+-      }
+-
+-      free_percpu(ptr);
+-}
+-
+ /*
+  * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
+  * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
+@@ -1967,7 +1941,6 @@ static struct kunit_case kasan_kunit_test_cases[] = {
+       KUNIT_CASE(vmalloc_oob),
+       KUNIT_CASE(vmap_tags),
+       KUNIT_CASE(vm_map_ram_tags),
+-      KUNIT_CASE(vmalloc_percpu),
+       KUNIT_CASE(match_all_not_assigned),
+       KUNIT_CASE(match_all_ptr_tag),
+       KUNIT_CASE(match_all_mem_tag),
+-- 
+2.43.0
+
diff --git a/queue-6.11/mctp-i2c-handle-null-header-address.patch b/queue-6.11/mctp-i2c-handle-null-header-address.patch

new file mode 100644 (file)

index 0000000..ca76de8
--- /dev/null
+++ b/queue-6.11/mctp-i2c-handle-null-header-address.patch
@@ -0,0 +1,44 @@
+From 3f2fafda72b327eddc56b97f4ef72a706ba040e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 22 Oct 2024 18:25:14 +0800
+Subject: mctp i2c: handle NULL header address
+
+From: Matt Johnston <matt@codeconstruct.com.au>
+
+[ Upstream commit 01e215975fd80af81b5b79f009d49ddd35976c13 ]
+
+daddr can be NULL if there is no neighbour table entry present,
+in that case the tx packet should be dropped.
+
+saddr will usually be set by MCTP core, but check for NULL in case a
+packet is transmitted by a different protocol.
+
+Fixes: f5b8abf9fc3d ("mctp i2c: MCTP I2C binding driver")
+Cc: stable@vger.kernel.org
+Reported-by: Dung Cao <dung@os.amperecomputing.com>
+Signed-off-by: Matt Johnston <matt@codeconstruct.com.au>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241022-mctp-i2c-null-dest-v3-1-e929709956c5@codeconstruct.com.au
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/mctp/mctp-i2c.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/net/mctp/mctp-i2c.c b/drivers/net/mctp/mctp-i2c.c
+index 4dc057c121f5d..e70fb66879941 100644
+--- a/drivers/net/mctp/mctp-i2c.c
++++ b/drivers/net/mctp/mctp-i2c.c
+@@ -588,6 +588,9 @@ static int mctp_i2c_header_create(struct sk_buff *skb, struct net_device *dev,
+       if (len > MCTP_I2C_MAXMTU)
+               return -EMSGSIZE;
+ 
++      if (!daddr || !saddr)
++              return -EINVAL;
++
+       lldst = *((u8 *)daddr);
+       llsrc = *((u8 *)saddr);
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch b/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch

new file mode 100644 (file)

index 0000000..a257b9e
--- /dev/null
+++ b/queue-6.11/mei-use-kvmalloc-for-read-buffer.patch
@@ -0,0 +1,55 @@
+From 2d23e9505edce6c98d763ca56fa41049743e5a0f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 15 Oct 2024 15:31:57 +0300
+Subject: mei: use kvmalloc for read buffer
+
+From: Alexander Usyskin <alexander.usyskin@intel.com>
+
+[ Upstream commit 4adf613e01bf99e1739f6ff3e162ad5b7d578d1a ]
+
+Read buffer is allocated according to max message size, reported by
+the firmware and may reach 64K in systems with pxp client.
+Contiguous 64k allocation may fail under memory pressure.
+Read buffer is used as in-driver message storage and not required
+to be contiguous.
+Use kvmalloc to allow kernel to allocate non-contiguous memory.
+
+Fixes: 3030dc056459 ("mei: add wrapper for queuing control commands.")
+Cc: stable <stable@kernel.org>
+Reported-by: Rohit Agarwal <rohiagar@chromium.org>
+Closes: https://lore.kernel.org/all/20240813084542.2921300-1-rohiagar@chromium.org/
+Tested-by: Brian Geffon <bgeffon@google.com>
+Signed-off-by: Alexander Usyskin <alexander.usyskin@intel.com>
+Acked-by: Tomas Winkler <tomasw@gmail.com>
+Link: https://lore.kernel.org/r/20241015123157.2337026-1-alexander.usyskin@intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/misc/mei/client.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
+index 9d090fa07516f..be011cef12e5d 100644
+--- a/drivers/misc/mei/client.c
++++ b/drivers/misc/mei/client.c
+@@ -321,7 +321,7 @@ void mei_io_cb_free(struct mei_cl_cb *cb)
+               return;
+ 
+       list_del(&cb->list);
+-      kfree(cb->buf.data);
++      kvfree(cb->buf.data);
+       kfree(cb->ext_hdr);
+       kfree(cb);
+ }
+@@ -497,7 +497,7 @@ struct mei_cl_cb *mei_cl_alloc_cb(struct mei_cl *cl, size_t length,
+       if (length == 0)
+               return cb;
+ 
+-      cb->buf.data = kmalloc(roundup(length, MEI_SLOT_SIZE), GFP_KERNEL);
++      cb->buf.data = kvmalloc(roundup(length, MEI_SLOT_SIZE), GFP_KERNEL);
+       if (!cb->buf.data) {
+               mei_io_cb_free(cb);
+               return NULL;
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch b/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch

new file mode 100644 (file)

index 0000000..9ca5438
--- /dev/null
+++ b/queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch
@@ -0,0 +1,75 @@
+From 4947414760b1f675ae0cc4cd81ff2fc2e877fdb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Oct 2024 17:12:29 +0200
+Subject: mm, mmap: limit THP alignment of anonymous mappings to PMD-aligned
+ sizes
+
+From: Vlastimil Babka <vbabka@suse.cz>
+
+[ Upstream commit d4148aeab412432bf928f311eca8a2ba52bb05df ]
+
+Since commit efa7df3e3bb5 ("mm: align larger anonymous mappings on THP
+boundaries") a mmap() of anonymous memory without a specific address hint
+and of at least PMD_SIZE will be aligned to PMD so that it can benefit
+from a THP backing page.
+
+However this change has been shown to regress some workloads
+significantly.  [1] reports regressions in various spec benchmarks, with
+up to 600% slowdown of the cactusBSSN benchmark on some platforms.  The
+benchmark seems to create many mappings of 4632kB, which would have merged
+to a large THP-backed area before commit efa7df3e3bb5 and now they are
+fragmented to multiple areas each aligned to PMD boundary with gaps
+between.  The regression then seems to be caused mainly due to the
+benchmark's memory access pattern suffering from TLB or cache aliasing due
+to the aligned boundaries of the individual areas.
+
+Another known regression bisected to commit efa7df3e3bb5 is darktable [2]
+[3] and early testing suggests this patch fixes the regression there as
+well.
+
+To fix the regression but still try to benefit from THP-friendly anonymous
+mapping alignment, add a condition that the size of the mapping must be a
+multiple of PMD size instead of at least PMD size.  In case of many
+odd-sized mapping like the cactusBSSN creates, those will stop being
+aligned and with gaps between, and instead naturally merge again.
+
+Link: https://lkml.kernel.org/r/20241024151228.101841-2-vbabka@suse.cz
+Fixes: efa7df3e3bb5 ("mm: align larger anonymous mappings on THP boundaries")
+Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
+Reported-by: Michael Matz <matz@suse.de>
+Debugged-by: Gabriel Krisman Bertazi <gabriel@krisman.be>
+Closes: https://bugzilla.suse.com/show_bug.cgi?id=1229012 [1]
+Reported-by: Matthias Bodenbinder <matthias@bodenbinder.de>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219366 [2]
+Closes: https://lore.kernel.org/all/2050f0d4-57b0-481d-bab8-05e8d48fed0c@leemhuis.info/ [3]
+Reviewed-by: Lorenzo Stoakes <lorenzo.stoakes@oracle.com>
+Reviewed-by: Yang Shi <yang@os.amperecomputing.com>
+Cc: Rik van Riel <riel@surriel.com>
+Cc: Jann Horn <jannh@google.com>
+Cc: Liam R. Howlett <Liam.Howlett@Oracle.com>
+Cc: Petr Tesarik <ptesarik@suse.com>
+Cc: Thorsten Leemhuis <regressions@leemhuis.info>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/mmap.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/mm/mmap.c b/mm/mmap.c
+index 18fddcce03b85..8a04f29aa4230 100644
+--- a/mm/mmap.c
++++ b/mm/mmap.c
+@@ -1952,7 +1952,8 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
+ 
+       if (get_area) {
+               addr = get_area(file, addr, len, pgoff, flags);
+-      } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) {
++      } else if (IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)
++                 && IS_ALIGNED(len, PMD_SIZE)) {
+               /* Ensures that larger anonymous mappings are THP aligned. */
+               addr = thp_get_unmapped_area_vmflags(file, addr, len,
+                                                    pgoff, flags, vm_flags);
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch b/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch

new file mode 100644 (file)

index 0000000..09db9a5
--- /dev/null
+++ b/queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch
@@ -0,0 +1,65 @@
+From 3d088fce5c0e855d74428f888cb999405935c393 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 13 Aug 2024 09:37:59 -0700
+Subject: mm: multi-gen LRU: ignore non-leaf pmd_young for force_scan=true
+
+From: Yuanchu Xie <yuanchu@google.com>
+
+[ Upstream commit bceeeaed4817ba7ad9013b4116c97220a60fcf7c ]
+
+When non-leaf pmd accessed bits are available, MGLRU page table walks can
+clear the non-leaf pmd accessed bit and ignore the accessed bit on the pte
+if it's on a different node, skipping a generation update as well.  If
+another scan occurs on the same node as said skipped pte.
+
+The non-leaf pmd accessed bit might remain cleared and the pte accessed
+bits won't be checked.  While this is sufficient for reclaim-driven aging,
+where the goal is to select a reasonably cold page, the access can be
+missed when aging proactively for workingset estimation of a node/memcg.
+
+In more detail, get_pfn_folio returns NULL if the folio's nid != node
+under scanning, so the page table walk skips processing of said pte.  Now
+the pmd_young flag on this pmd is cleared, and if none of the pte's are
+accessed before another scan occurs on the folio's node, the pmd_young
+check fails and the pte accessed bit is skipped.
+
+Since force_scan disables various other optimizations, we check force_scan
+to ignore the non-leaf pmd accessed bit.
+
+Link: https://lkml.kernel.org/r/20240813163759.742675-1-yuanchu@google.com
+Signed-off-by: Yuanchu Xie <yuanchu@google.com>
+Acked-by: Yu Zhao <yuzhao@google.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Lance Yang <ioworker0@gmail.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: ddd6d8e975b1 ("mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/vmscan.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index 128f307da6eea..b1f88638c5ab4 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3456,7 +3456,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
+                       goto next;
+ 
+               if (!pmd_trans_huge(pmd[i])) {
+-                      if (should_clear_pmd_young())
++                      if (!walk->force_scan && should_clear_pmd_young())
+                               pmdp_test_and_clear_young(vma, addr, pmd + i);
+                       goto next;
+               }
+@@ -3543,7 +3543,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
+ 
+               walk->mm_stats[MM_NONLEAF_TOTAL]++;
+ 
+-              if (should_clear_pmd_young()) {
++              if (!walk->force_scan && should_clear_pmd_young()) {
+                       if (!pmd_young(val))
+                               continue;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch b/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch

new file mode 100644 (file)

index 0000000..f75c6d6
--- /dev/null
+++ b/queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch
@@ -0,0 +1,153 @@
+From fd4ecc890b00d30a50f93c6fa9e07fdf95eddb1b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Oct 2024 01:29:38 +0000
+Subject: mm: multi-gen LRU: remove MM_LEAF_OLD and MM_NONLEAF_TOTAL stats
+
+From: Yu Zhao <yuzhao@google.com>
+
+[ Upstream commit ddd6d8e975b171ea3f63a011a75820883ff0d479 ]
+
+Patch series "mm: multi-gen LRU: Have secondary MMUs participate in
+MM_WALK".
+
+Today, the MM_WALK capability causes MGLRU to clear the young bit from
+PMDs and PTEs during the page table walk before eviction, but MGLRU does
+not call the clear_young() MMU notifier in this case.  By not calling this
+notifier, the MM walk takes less time/CPU, but it causes pages that are
+accessed mostly through KVM / secondary MMUs to appear younger than they
+should be.
+
+We do call the clear_young() notifier today, but only when attempting to
+evict the page, so we end up clearing young/accessed information less
+frequently for secondary MMUs than for mm PTEs, and therefore they appear
+younger and are less likely to be evicted.  Therefore, memory that is
+*not* being accessed mostly by KVM will be evicted *more* frequently,
+worsening performance.
+
+ChromeOS observed a tab-open latency regression when enabling MGLRU with a
+setup that involved running a VM:
+
+               Tab-open latency histogram (ms)
+Version                p50     mean    p95     p99     max
+base           1315    1198    2347    3454    10319
+mglru          2559    1311    7399    12060   43758
+fix            1119    926     2470    4211    6947
+
+This series replaces the final non-selftest patchs from this series[1],
+which introduced a similar change (and a new MMU notifier) with KVM
+optimizations.  I'll send a separate series (to Sean and Paolo) for the
+KVM optimizations.
+
+This series also makes proactive reclaim with MGLRU possible for KVM
+memory.  I have verified that this functions correctly with the selftest
+from [1], but given that that test is a KVM selftest, I'll send it with
+the rest of the KVM optimizations later.  Andrew, let me know if you'd
+like to take the test now anyway.
+
+[1]: https://lore.kernel.org/linux-mm/20240926013506.860253-18-jthoughton@google.com/
+
+This patch (of 2):
+
+The removed stats, MM_LEAF_OLD and MM_NONLEAF_TOTAL, are not very helpful
+and become more complicated to properly compute when adding
+test/clear_young() notifiers in MGLRU's mm walk.
+
+Link: https://lkml.kernel.org/r/20241019012940.3656292-1-jthoughton@google.com
+Link: https://lkml.kernel.org/r/20241019012940.3656292-2-jthoughton@google.com
+Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Signed-off-by: James Houghton <jthoughton@google.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: David Matlack <dmatlack@google.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: David Stevens <stevensd@google.com>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmzone.h |  2 --
+ mm/vmscan.c            | 14 +++++---------
+ 2 files changed, 5 insertions(+), 11 deletions(-)
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 1dc6248feb832..5f44d24ed9ffe 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -458,9 +458,7 @@ struct lru_gen_folio {
+ 
+ enum {
+       MM_LEAF_TOTAL,          /* total leaf entries */
+-      MM_LEAF_OLD,            /* old leaf entries */
+       MM_LEAF_YOUNG,          /* young leaf entries */
+-      MM_NONLEAF_TOTAL,       /* total non-leaf entries */
+       MM_NONLEAF_FOUND,       /* non-leaf entries found in Bloom filters */
+       MM_NONLEAF_ADDED,       /* non-leaf entries added to Bloom filters */
+       NR_MM_STATS
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index b1f88638c5ab4..c6d9f5f4f6002 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -3376,7 +3376,6 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
+                       continue;
+ 
+               if (!pte_young(ptent)) {
+-                      walk->mm_stats[MM_LEAF_OLD]++;
+                       continue;
+               }
+ 
+@@ -3529,7 +3528,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
+                       walk->mm_stats[MM_LEAF_TOTAL]++;
+ 
+                       if (!pmd_young(val)) {
+-                              walk->mm_stats[MM_LEAF_OLD]++;
+                               continue;
+                       }
+ 
+@@ -3541,8 +3539,6 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
+                       continue;
+               }
+ 
+-              walk->mm_stats[MM_NONLEAF_TOTAL]++;
+-
+               if (!walk->force_scan && should_clear_pmd_young()) {
+                       if (!pmd_young(val))
+                               continue;
+@@ -5231,11 +5227,11 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
+       for (tier = 0; tier < MAX_NR_TIERS; tier++) {
+               seq_printf(m, "            %10d", tier);
+               for (type = 0; type < ANON_AND_FILE; type++) {
+-                      const char *s = "   ";
++                      const char *s = "xxx";
+                       unsigned long n[3] = {};
+ 
+                       if (seq == max_seq) {
+-                              s = "RT ";
++                              s = "RTx";
+                               n[0] = READ_ONCE(lrugen->avg_refaulted[type][tier]);
+                               n[1] = READ_ONCE(lrugen->avg_total[type][tier]);
+                       } else if (seq == min_seq[type] || NR_HIST_GENS > 1) {
+@@ -5257,14 +5253,14 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec,
+ 
+       seq_puts(m, "                      ");
+       for (i = 0; i < NR_MM_STATS; i++) {
+-              const char *s = "      ";
++              const char *s = "xxxx";
+               unsigned long n = 0;
+ 
+               if (seq == max_seq && NR_HIST_GENS == 1) {
+-                      s = "LOYNFA";
++                      s = "TYFA";
+                       n = READ_ONCE(mm_state->stats[hist][i]);
+               } else if (seq != max_seq && NR_HIST_GENS > 1) {
+-                      s = "loynfa";
++                      s = "tyfa";
+                       n = READ_ONCE(mm_state->stats[hist][i]);
+               }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch b/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch

new file mode 100644 (file)

index 0000000..67d6071
--- /dev/null
+++ b/queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch
@@ -0,0 +1,329 @@
+From 802dda2dc6f15bd992a84ed1e03135b99cc3d808 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 19 Oct 2024 01:29:39 +0000
+Subject: mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify()
+
+From: Yu Zhao <yuzhao@google.com>
+
+[ Upstream commit 1d4832becdc2cdb2cffe2a6050c9d9fd8ff1c58c ]
+
+When the MM_WALK capability is enabled, memory that is mostly accessed by
+a VM appears younger than it really is, therefore this memory will be less
+likely to be evicted.  Therefore, the presence of a running VM can
+significantly increase swap-outs for non-VM memory, regressing the
+performance for the rest of the system.
+
+Fix this regression by always calling {ptep,pmdp}_clear_young_notify()
+whenever we clear the young bits on PMDs/PTEs.
+
+[jthoughton@google.com: fix link-time error]
+Link: https://lkml.kernel.org/r/20241019012940.3656292-3-jthoughton@google.com
+Fixes: bd74fdaea146 ("mm: multi-gen LRU: support page table walks")
+Signed-off-by: Yu Zhao <yuzhao@google.com>
+Signed-off-by: James Houghton <jthoughton@google.com>
+Reported-by: David Stevens <stevensd@google.com>
+Cc: Axel Rasmussen <axelrasmussen@google.com>
+Cc: David Matlack <dmatlack@google.com>
+Cc: David Rientjes <rientjes@google.com>
+Cc: Oliver Upton <oliver.upton@linux.dev>
+Cc: Paolo Bonzini <pbonzini@redhat.com>
+Cc: Sean Christopherson <seanjc@google.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Cc: kernel test robot <lkp@intel.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/mmzone.h |  5 ++-
+ mm/rmap.c              |  9 ++---
+ mm/vmscan.c            | 88 +++++++++++++++++++++++-------------------
+ 3 files changed, 55 insertions(+), 47 deletions(-)
+
+diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
+index 5f44d24ed9ffe..fd04c8e942250 100644
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -555,7 +555,7 @@ struct lru_gen_memcg {
+ 
+ void lru_gen_init_pgdat(struct pglist_data *pgdat);
+ void lru_gen_init_lruvec(struct lruvec *lruvec);
+-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
++bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw);
+ 
+ void lru_gen_init_memcg(struct mem_cgroup *memcg);
+ void lru_gen_exit_memcg(struct mem_cgroup *memcg);
+@@ -574,8 +574,9 @@ static inline void lru_gen_init_lruvec(struct lruvec *lruvec)
+ {
+ }
+ 
+-static inline void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
++static inline bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ {
++      return false;
+ }
+ 
+ static inline void lru_gen_init_memcg(struct mem_cgroup *memcg)
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 2630bde38640c..3d89847f01dad 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -885,13 +885,10 @@ static bool folio_referenced_one(struct folio *folio,
+                       return false;
+               }
+ 
+-              if (pvmw.pte) {
+-                      if (lru_gen_enabled() &&
+-                          pte_young(ptep_get(pvmw.pte))) {
+-                              lru_gen_look_around(&pvmw);
++              if (lru_gen_enabled() && pvmw.pte) {
++                      if (lru_gen_look_around(&pvmw))
+                               referenced++;
+-                      }
+-
++              } else if (pvmw.pte) {
+                       if (ptep_clear_flush_young_notify(vma, address,
+                                               pvmw.pte))
+                               referenced++;
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index a2ad17092abdf..f5bcd08527ae0 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -56,6 +56,7 @@
+ #include <linux/khugepaged.h>
+ #include <linux/rculist_nulls.h>
+ #include <linux/random.h>
++#include <linux/mmu_notifier.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/div64.h>
+@@ -3276,7 +3277,8 @@ static bool get_next_vma(unsigned long mask, unsigned long size, struct mm_walk
+       return false;
+ }
+ 
+-static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr)
++static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned long addr,
++                               struct pglist_data *pgdat)
+ {
+       unsigned long pfn = pte_pfn(pte);
+ 
+@@ -3288,13 +3290,20 @@ static unsigned long get_pte_pfn(pte_t pte, struct vm_area_struct *vma, unsigned
+       if (WARN_ON_ONCE(pte_devmap(pte) || pte_special(pte)))
+               return -1;
+ 
++      if (!pte_young(pte) && !mm_has_notifiers(vma->vm_mm))
++              return -1;
++
+       if (WARN_ON_ONCE(!pfn_valid(pfn)))
+               return -1;
+ 
++      if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
++              return -1;
++
+       return pfn;
+ }
+ 
+-static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr)
++static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned long addr,
++                               struct pglist_data *pgdat)
+ {
+       unsigned long pfn = pmd_pfn(pmd);
+ 
+@@ -3306,9 +3315,15 @@ static unsigned long get_pmd_pfn(pmd_t pmd, struct vm_area_struct *vma, unsigned
+       if (WARN_ON_ONCE(pmd_devmap(pmd)))
+               return -1;
+ 
++      if (!pmd_young(pmd) && !mm_has_notifiers(vma->vm_mm))
++              return -1;
++
+       if (WARN_ON_ONCE(!pfn_valid(pfn)))
+               return -1;
+ 
++      if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
++              return -1;
++
+       return pfn;
+ }
+ 
+@@ -3317,10 +3332,6 @@ static struct folio *get_pfn_folio(unsigned long pfn, struct mem_cgroup *memcg,
+ {
+       struct folio *folio;
+ 
+-      /* try to avoid unnecessary memory loads */
+-      if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+-              return NULL;
+-
+       folio = pfn_folio(pfn);
+       if (folio_nid(folio) != pgdat->node_id)
+               return NULL;
+@@ -3376,20 +3387,16 @@ static bool walk_pte_range(pmd_t *pmd, unsigned long start, unsigned long end,
+               total++;
+               walk->mm_stats[MM_LEAF_TOTAL]++;
+ 
+-              pfn = get_pte_pfn(ptent, args->vma, addr);
++              pfn = get_pte_pfn(ptent, args->vma, addr, pgdat);
+               if (pfn == -1)
+                       continue;
+ 
+-              if (!pte_young(ptent)) {
+-                      continue;
+-              }
+-
+               folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+               if (!folio)
+                       continue;
+ 
+-              if (!ptep_test_and_clear_young(args->vma, addr, pte + i))
+-                      VM_WARN_ON_ONCE(true);
++              if (!ptep_clear_young_notify(args->vma, addr, pte + i))
++                      continue;
+ 
+               young++;
+               walk->mm_stats[MM_LEAF_YOUNG]++;
+@@ -3455,21 +3462,25 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area
+               /* don't round down the first address */
+               addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first;
+ 
+-              pfn = get_pmd_pfn(pmd[i], vma, addr);
+-              if (pfn == -1)
++              if (!pmd_present(pmd[i]))
+                       goto next;
+ 
+               if (!pmd_trans_huge(pmd[i])) {
+-                      if (!walk->force_scan && should_clear_pmd_young())
++                      if (!walk->force_scan && should_clear_pmd_young() &&
++                          !mm_has_notifiers(args->mm))
+                               pmdp_test_and_clear_young(vma, addr, pmd + i);
+                       goto next;
+               }
+ 
++              pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat);
++              if (pfn == -1)
++                      goto next;
++
+               folio = get_pfn_folio(pfn, memcg, pgdat, walk->can_swap);
+               if (!folio)
+                       goto next;
+ 
+-              if (!pmdp_test_and_clear_young(vma, addr, pmd + i))
++              if (!pmdp_clear_young_notify(vma, addr, pmd + i))
+                       goto next;
+ 
+               walk->mm_stats[MM_LEAF_YOUNG]++;
+@@ -3527,24 +3538,18 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end,
+               }
+ 
+               if (pmd_trans_huge(val)) {
+-                      unsigned long pfn = pmd_pfn(val);
+                       struct pglist_data *pgdat = lruvec_pgdat(walk->lruvec);
++                      unsigned long pfn = get_pmd_pfn(val, vma, addr, pgdat);
+ 
+                       walk->mm_stats[MM_LEAF_TOTAL]++;
+ 
+-                      if (!pmd_young(val)) {
+-                              continue;
+-                      }
+-
+-                      /* try to avoid unnecessary memory loads */
+-                      if (pfn < pgdat->node_start_pfn || pfn >= pgdat_end_pfn(pgdat))
+-                              continue;
+-
+-                      walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
++                      if (pfn != -1)
++                              walk_pmd_range_locked(pud, addr, vma, args, bitmap, &first);
+                       continue;
+               }
+ 
+-              if (!walk->force_scan && should_clear_pmd_young()) {
++              if (!walk->force_scan && should_clear_pmd_young() &&
++                  !mm_has_notifiers(args->mm)) {
+                       if (!pmd_young(val))
+                               continue;
+ 
+@@ -4018,13 +4023,13 @@ static void lru_gen_age_node(struct pglist_data *pgdat, struct scan_control *sc)
+  * the PTE table to the Bloom filter. This forms a feedback loop between the
+  * eviction and the aging.
+  */
+-void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
++bool lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ {
+       int i;
+       unsigned long start;
+       unsigned long end;
+       struct lru_gen_mm_walk *walk;
+-      int young = 0;
++      int young = 1;
+       pte_t *pte = pvmw->pte;
+       unsigned long addr = pvmw->address;
+       struct vm_area_struct *vma = pvmw->vma;
+@@ -4040,12 +4045,15 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+       lockdep_assert_held(pvmw->ptl);
+       VM_WARN_ON_ONCE_FOLIO(folio_test_lru(folio), folio);
+ 
++      if (!ptep_clear_young_notify(vma, addr, pte))
++              return false;
++
+       if (spin_is_contended(pvmw->ptl))
+-              return;
++              return true;
+ 
+       /* exclude special VMAs containing anon pages from COW */
+       if (vma->vm_flags & VM_SPECIAL)
+-              return;
++              return true;
+ 
+       /* avoid taking the LRU lock under the PTL when possible */
+       walk = current->reclaim_state ? current->reclaim_state->mm_walk : NULL;
+@@ -4053,6 +4061,9 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+       start = max(addr & PMD_MASK, vma->vm_start);
+       end = min(addr | ~PMD_MASK, vma->vm_end - 1) + 1;
+ 
++      if (end - start == PAGE_SIZE)
++              return true;
++
+       if (end - start > MIN_LRU_BATCH * PAGE_SIZE) {
+               if (addr - start < MIN_LRU_BATCH * PAGE_SIZE / 2)
+                       end = start + MIN_LRU_BATCH * PAGE_SIZE;
+@@ -4066,7 +4077,7 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+ 
+       /* folio_update_gen() requires stable folio_memcg() */
+       if (!mem_cgroup_trylock_pages(memcg))
+-              return;
++              return true;
+ 
+       arch_enter_lazy_mmu_mode();
+ 
+@@ -4076,19 +4087,16 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+               unsigned long pfn;
+               pte_t ptent = ptep_get(pte + i);
+ 
+-              pfn = get_pte_pfn(ptent, vma, addr);
++              pfn = get_pte_pfn(ptent, vma, addr, pgdat);
+               if (pfn == -1)
+                       continue;
+ 
+-              if (!pte_young(ptent))
+-                      continue;
+-
+               folio = get_pfn_folio(pfn, memcg, pgdat, can_swap);
+               if (!folio)
+                       continue;
+ 
+-              if (!ptep_test_and_clear_young(vma, addr, pte + i))
+-                      VM_WARN_ON_ONCE(true);
++              if (!ptep_clear_young_notify(vma, addr, pte + i))
++                      continue;
+ 
+               young++;
+ 
+@@ -4118,6 +4126,8 @@ void lru_gen_look_around(struct page_vma_mapped_walk *pvmw)
+       /* feedback from rmap walkers to page table walkers */
+       if (mm_state && suitable_to_scan(i, young))
+               update_bloom_filter(mm_state, max_seq, pvmw->pmd);
++
++      return true;
+ }
+ 
+ /******************************************************************************
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch b/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch

new file mode 100644 (file)

index 0000000..991847d
--- /dev/null
+++ b/queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
@@ -0,0 +1,88 @@
+From 4006d5c15746fc70b8b7ede4c29128a2be296aae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 11 Oct 2024 13:07:37 +0100
+Subject: mm/page_alloc: let GFP_ATOMIC order-0 allocs access highatomic
+ reserves
+
+From: Matt Fleming <mfleming@cloudflare.com>
+
+[ Upstream commit 281dd25c1a018261a04d1b8bf41a0674000bfe38 ]
+
+Under memory pressure it's possible for GFP_ATOMIC order-0 allocations to
+fail even though free pages are available in the highatomic reserves.
+GFP_ATOMIC allocations cannot trigger unreserve_highatomic_pageblock()
+since it's only run from reclaim.
+
+Given that such allocations will pass the watermarks in
+__zone_watermark_unusable_free(), it makes sense to fallback to highatomic
+reserves the same way that ALLOC_OOM can.
+
+This fixes order-0 page allocation failures observed on Cloudflare's fleet
+when handling network packets:
+
+  kswapd1: page allocation failure: order:0, mode:0x820(GFP_ATOMIC),
+  nodemask=(null),cpuset=/,mems_allowed=0-7
+  CPU: 10 PID: 696 Comm: kswapd1 Kdump: loaded Tainted: G           O 6.6.43-CUSTOM #1
+  Hardware name: MACHINE
+  Call Trace:
+   <IRQ>
+   dump_stack_lvl+0x3c/0x50
+   warn_alloc+0x13a/0x1c0
+   __alloc_pages_slowpath.constprop.0+0xc9d/0xd10
+   __alloc_pages+0x327/0x340
+   __napi_alloc_skb+0x16d/0x1f0
+   bnxt_rx_page_skb+0x96/0x1b0 [bnxt_en]
+   bnxt_rx_pkt+0x201/0x15e0 [bnxt_en]
+   __bnxt_poll_work+0x156/0x2b0 [bnxt_en]
+   bnxt_poll+0xd9/0x1c0 [bnxt_en]
+   __napi_poll+0x2b/0x1b0
+   bpf_trampoline_6442524138+0x7d/0x1000
+   __napi_poll+0x5/0x1b0
+   net_rx_action+0x342/0x740
+   handle_softirqs+0xcf/0x2b0
+   irq_exit_rcu+0x6c/0x90
+   sysvec_apic_timer_interrupt+0x72/0x90
+   </IRQ>
+
+[mfleming@cloudflare.com: update comment]
+  Link: https://lkml.kernel.org/r/20241015125158.3597702-1-matt@readmodwrite.com
+Link: https://lkml.kernel.org/r/20241011120737.3300370-1-matt@readmodwrite.com
+Link: https://lore.kernel.org/all/CAGis_TWzSu=P7QJmjD58WWiu3zjMTVKSzdOwWE8ORaGytzWJwQ@mail.gmail.com/
+Fixes: 1d91df85f399 ("mm/page_alloc: handle a missing case for memalloc_nocma_{save/restore} APIs")
+Signed-off-by: Matt Fleming <mfleming@cloudflare.com>
+Suggested-by: Vlastimil Babka <vbabka@suse.cz>
+Reviewed-by: Vlastimil Babka <vbabka@suse.cz>
+Cc: Mel Gorman <mgorman@techsingularity.net>
+Cc: Michal Hocko <mhocko@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/page_alloc.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/mm/page_alloc.c b/mm/page_alloc.c
+index 91ace8ca97e21..ec459522c2934 100644
+--- a/mm/page_alloc.c
++++ b/mm/page_alloc.c
+@@ -2874,12 +2874,12 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone,
+                       page = __rmqueue(zone, order, migratetype, alloc_flags);
+ 
+                       /*
+-                       * If the allocation fails, allow OOM handling access
+-                       * to HIGHATOMIC reserves as failing now is worse than
+-                       * failing a high-order atomic allocation in the
+-                       * future.
++                       * If the allocation fails, allow OOM handling and
++                       * order-0 (atomic) allocs access to HIGHATOMIC
++                       * reserves as failing now is worse than failing a
++                       * high-order atomic allocation in the future.
+                        */
+-                      if (!page && (alloc_flags & ALLOC_OOM))
++                      if (!page && (alloc_flags & (ALLOC_OOM|ALLOC_NON_BLOCK)))
+                               page = __rmqueue_smallest(zone, order, MIGRATE_HIGHATOMIC);
+ 
+                       if (!page) {
+-- 
+2.43.0
+
diff --git a/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch b/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch

new file mode 100644 (file)

index 0000000..a2ed2f4
--- /dev/null
+++ b/queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch
@@ -0,0 +1,98 @@
+From 145f20a56a59e3098e1c51e0c1c3341de4018532 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 Jul 2024 16:36:41 +0800
+Subject: mm: shrink skip folio mapped by an exiting process
+
+From: Zhiguo Jiang <justinjiang@vivo.com>
+
+[ Upstream commit c495b97624d0c059b0403e26dadb166d69918409 ]
+
+The releasing process of the non-shared anonymous folio mapped solely by
+an exiting process may go through two flows: 1) the anonymous folio is
+firstly is swaped-out into swapspace and transformed into a swp_entry in
+shrink_folio_list; 2) then the swp_entry is released in the process
+exiting flow.  This will result in the high cpu load of releasing a
+non-shared anonymous folio mapped solely by an exiting process.
+
+When the low system memory and the exiting process exist at the same time,
+it will be likely to happen, because the non-shared anonymous folio mapped
+solely by an exiting process may be reclaimed by shrink_folio_list.
+
+This patch is that shrink skips the non-shared anonymous folio solely
+mapped by an exting process and this folio is only released directly in
+the process exiting flow, which will save swap-out time and alleviate the
+load of the process exiting.
+
+Barry provided some effectiveness testing in [1].  "I observed that
+this patch effectively skipped 6114 folios (either 4KB or 64KB mTHP),
+potentially reducing the swap-out by up to 92MB (97,300,480 bytes)
+during the process exit.  The working set size is 256MB."
+
+Link: https://lkml.kernel.org/r/20240710083641.546-1-justinjiang@vivo.com
+Link: https://lore.kernel.org/linux-mm/20240710033212.36497-1-21cnbao@gmail.com/ [1]
+Signed-off-by: Zhiguo Jiang <justinjiang@vivo.com>
+Acked-by: Barry Song <baohua@kernel.org>
+Cc: David Hildenbrand <david@redhat.com>
+Cc: Matthew Wilcox <willy@infradead.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Stable-dep-of: 1d4832becdc2 ("mm: multi-gen LRU: use {ptep,pmdp}_clear_young_notify()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/rmap.c   | 15 +++++++++++++++
+ mm/vmscan.c |  7 ++++++-
+ 2 files changed, 21 insertions(+), 1 deletion(-)
+
+diff --git a/mm/rmap.c b/mm/rmap.c
+index 2490e727e2dcb..2630bde38640c 100644
+--- a/mm/rmap.c
++++ b/mm/rmap.c
+@@ -75,6 +75,7 @@
+ #include <linux/memremap.h>
+ #include <linux/userfaultfd_k.h>
+ #include <linux/mm_inline.h>
++#include <linux/oom.h>
+ 
+ #include <asm/tlbflush.h>
+ 
+@@ -870,6 +871,20 @@ static bool folio_referenced_one(struct folio *folio,
+                       continue;
+               }
+ 
++              /*
++               * Skip the non-shared swapbacked folio mapped solely by
++               * the exiting or OOM-reaped process. This avoids redundant
++               * swap-out followed by an immediate unmap.
++               */
++              if ((!atomic_read(&vma->vm_mm->mm_users) ||
++                  check_stable_address_space(vma->vm_mm)) &&
++                  folio_test_anon(folio) && folio_test_swapbacked(folio) &&
++                  !folio_likely_mapped_shared(folio)) {
++                      pra->referenced = -1;
++                      page_vma_mapped_walk_done(&pvmw);
++                      return false;
++              }
++
+               if (pvmw.pte) {
+                       if (lru_gen_enabled() &&
+                           pte_young(ptep_get(pvmw.pte))) {
+diff --git a/mm/vmscan.c b/mm/vmscan.c
+index c6d9f5f4f6002..a2ad17092abdf 100644
+--- a/mm/vmscan.c
++++ b/mm/vmscan.c
+@@ -863,7 +863,12 @@ static enum folio_references folio_check_references(struct folio *folio,
+       if (vm_flags & VM_LOCKED)
+               return FOLIOREF_ACTIVATE;
+ 
+-      /* rmap lock contention: rotate */
++      /*
++       * There are two cases to consider.
++       * 1) Rmap lock contention: rotate.
++       * 2) Skip the non-shared swapbacked folio mapped solely by
++       *    the exiting or OOM-reaped process.
++       */
+       if (referenced_ptes == -1)
+               return FOLIOREF_KEEP;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch b/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch

new file mode 100644 (file)

index 0000000..c657b5f
--- /dev/null
+++ b/queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch
@@ -0,0 +1,79 @@
+From 5ff8dc7ec233f1cad08d9df82748819a7e9a0b6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2024 12:25:26 +0200
+Subject: mptcp: init: protect sched with rcu_read_lock
+
+From: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+
+[ Upstream commit 3deb12c788c385e17142ce6ec50f769852fcec65 ]
+
+Enabling CONFIG_PROVE_RCU_LIST with its dependence CONFIG_RCU_EXPERT
+creates this splat when an MPTCP socket is created:
+
+  =============================
+  WARNING: suspicious RCU usage
+  6.12.0-rc2+ #11 Not tainted
+  -----------------------------
+  net/mptcp/sched.c:44 RCU-list traversed in non-reader section!!
+
+  other info that might help us debug this:
+
+  rcu_scheduler_active = 2, debug_locks = 1
+  no locks held by mptcp_connect/176.
+
+  stack backtrace:
+  CPU: 0 UID: 0 PID: 176 Comm: mptcp_connect Not tainted 6.12.0-rc2+ #11
+  Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011
+  Call Trace:
+   <TASK>
+   dump_stack_lvl (lib/dump_stack.c:123)
+   lockdep_rcu_suspicious (kernel/locking/lockdep.c:6822)
+   mptcp_sched_find (net/mptcp/sched.c:44 (discriminator 7))
+   mptcp_init_sock (net/mptcp/protocol.c:2867 (discriminator 1))
+   ? sock_init_data_uid (arch/x86/include/asm/atomic.h:28)
+   inet_create.part.0.constprop.0 (net/ipv4/af_inet.c:386)
+   ? __sock_create (include/linux/rcupdate.h:347 (discriminator 1))
+   __sock_create (net/socket.c:1576)
+   __sys_socket (net/socket.c:1671)
+   ? __pfx___sys_socket (net/socket.c:1712)
+   ? do_user_addr_fault (arch/x86/mm/fault.c:1419 (discriminator 1))
+   __x64_sys_socket (net/socket.c:1728)
+   do_syscall_64 (arch/x86/entry/common.c:52 (discriminator 1))
+   entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:130)
+
+That's because when the socket is initialised, rcu_read_lock() is not
+used despite the explicit comment written above the declaration of
+mptcp_sched_find() in sched.c. Adding the missing lock/unlock avoids the
+warning.
+
+Fixes: 1730b2b2c5a5 ("mptcp: add sched in mptcp_sock")
+Cc: stable@vger.kernel.org
+Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/523
+Reviewed-by: Geliang Tang <geliang@kernel.org>
+Signed-off-by: Matthieu Baerts (NGI0) <matttbe@kernel.org>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20241021-net-mptcp-sched-lock-v1-1-637759cf061c@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/mptcp/protocol.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
+index d4b3bc46cdaaf..ec87b36f0d451 100644
+--- a/net/mptcp/protocol.c
++++ b/net/mptcp/protocol.c
+@@ -2864,8 +2864,10 @@ static int mptcp_init_sock(struct sock *sk)
+       if (unlikely(!net->mib.mptcp_statistics) && !mptcp_mib_alloc(net))
+               return -ENOMEM;
+ 
++      rcu_read_lock();
+       ret = mptcp_init_sched(mptcp_sk(sk),
+                              mptcp_sched_find(mptcp_get_scheduler(net)));
++      rcu_read_unlock();
+       if (ret)
+               return ret;
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch b/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch

new file mode 100644 (file)

index 0000000..a2cc491
--- /dev/null
+++ b/queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch
@@ -0,0 +1,48 @@
+From 5ef1d4567243e77f8f9b35e036672034bc7ae1b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 13:45:46 -0700
+Subject: nvme: re-fix error-handling for io_uring nvme-passthrough
+
+From: Keith Busch <kbusch@kernel.org>
+
+[ Upstream commit 5eed4fb274cd6579f2fb4190b11c4c86c553cd06 ]
+
+This was previously fixed with commit 1147dd0503564fa0e0348
+("nvme: fix error-handling for io_uring nvme-passthrough"), but the
+change was mistakenly undone in a later commit.
+
+Fixes: d6aacee9255e7f ("nvme: use bio_integrity_map_user")
+Cc: stable@vger.kernel.org
+Reported-by: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Anuj Gupta <anuj20.g@samsung.com>
+Reviewed-by: Kanchan Joshi <joshi.k@samsung.com>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/host/ioctl.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/nvme/host/ioctl.c b/drivers/nvme/host/ioctl.c
+index 15c93ce07e263..2cb35c4528a93 100644
+--- a/drivers/nvme/host/ioctl.c
++++ b/drivers/nvme/host/ioctl.c
+@@ -423,10 +423,13 @@ static enum rq_end_io_ret nvme_uring_cmd_end_io(struct request *req,
+       struct io_uring_cmd *ioucmd = req->end_io_data;
+       struct nvme_uring_cmd_pdu *pdu = nvme_uring_cmd_pdu(ioucmd);
+ 
+-      if (nvme_req(req)->flags & NVME_REQ_CANCELLED)
++      if (nvme_req(req)->flags & NVME_REQ_CANCELLED) {
+               pdu->status = -EINTR;
+-      else
++      } else {
+               pdu->status = nvme_req(req)->status;
++              if (!pdu->status)
++                      pdu->status = blk_status_to_errno(err);
++      }
+       pdu->result = le64_to_cpu(nvme_req(req)->result.u64);
+ 
+       /*
+-- 
+2.43.0
+
diff --git a/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch b/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch

new file mode 100644 (file)

index 0000000..9f9262c
--- /dev/null
+++ b/queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch
@@ -0,0 +1,41 @@
+From 24d4b379165fb7dc6b55d71277da5e190dc9243f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 16 Sep 2024 22:41:37 +0500
+Subject: nvmet-auth: assign dh_key to NULL after kfree_sensitive
+
+From: Vitaliy Shevtsov <v.shevtsov@maxima.ru>
+
+[ Upstream commit d2f551b1f72b4c508ab9298419f6feadc3b5d791 ]
+
+ctrl->dh_key might be used across multiple calls to nvmet_setup_dhgroup()
+for the same controller. So it's better to nullify it after release on
+error path in order to avoid double free later in nvmet_destroy_auth().
+
+Found by Linux Verification Center (linuxtesting.org) with Svace.
+
+Fixes: 7a277c37d352 ("nvmet-auth: Diffie-Hellman key exchange support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Vitaliy Shevtsov <v.shevtsov@maxima.ru>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: Hannes Reinecke <hare@suse.de>
+Signed-off-by: Keith Busch <kbusch@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/nvme/target/auth.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/nvme/target/auth.c b/drivers/nvme/target/auth.c
+index 8bc3f431c77f6..8c41a47dfed17 100644
+--- a/drivers/nvme/target/auth.c
++++ b/drivers/nvme/target/auth.c
+@@ -103,6 +103,7 @@ int nvmet_setup_dhgroup(struct nvmet_ctrl *ctrl, u8 dhgroup_id)
+                       pr_debug("%s: ctrl %d failed to generate private key, err %d\n",
+                                __func__, ctrl->cntlid, ret);
+                       kfree_sensitive(ctrl->dh_key);
++                      ctrl->dh_key = NULL;
+                       return ret;
+               }
+               ctrl->dh_keysize = crypto_kpp_maxsize(ctrl->dh_tfm);
+-- 
+2.43.0
+
diff --git a/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch b/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch

new file mode 100644 (file)

index 0000000..79b075a
--- /dev/null
+++ b/queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch
@@ -0,0 +1,60 @@
+From dbfb2ead9ec9755ada44cb33ed7a48e82347cd98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 19:43:47 +0800
+Subject: ocfs2: pass u64 to ocfs2_truncate_inline maybe overflow
+
+From: Edward Adam Davis <eadavis@qq.com>
+
+[ Upstream commit bc0a2f3a73fcdac651fca64df39306d1e5ebe3b0 ]
+
+Syzbot reported a kernel BUG in ocfs2_truncate_inline.  There are two
+reasons for this: first, the parameter value passed is greater than
+ocfs2_max_inline_data_with_xattr, second, the start and end parameters of
+ocfs2_truncate_inline are "unsigned int".
+
+So, we need to add a sanity check for byte_start and byte_len right before
+ocfs2_truncate_inline() in ocfs2_remove_inode_range(), if they are greater
+than ocfs2_max_inline_data_with_xattr return -EINVAL.
+
+Link: https://lkml.kernel.org/r/tencent_D48DB5122ADDAEDDD11918CFB68D93258C07@qq.com
+Fixes: 1afc32b95233 ("ocfs2: Write support for inline data")
+Signed-off-by: Edward Adam Davis <eadavis@qq.com>
+Reported-by: syzbot+81092778aac03460d6b7@syzkaller.appspotmail.com
+Closes: https://syzkaller.appspot.com/bug?extid=81092778aac03460d6b7
+Reviewed-by: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Joel Becker <jlbec@evilplan.org>
+Cc: Joseph Qi <joseph.qi@linux.alibaba.com>
+Cc: Mark Fasheh <mark@fasheh.com>
+Cc: Junxiao Bi <junxiao.bi@oracle.com>
+Cc: Changwei Ge <gechangwei@live.cn>
+Cc: Gang He <ghe@suse.com>
+Cc: Jun Piao <piaojun@huawei.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ocfs2/file.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
+index ccc57038a9779..02d2beb7ddb95 100644
+--- a/fs/ocfs2/file.c
++++ b/fs/ocfs2/file.c
+@@ -1783,6 +1783,14 @@ int ocfs2_remove_inode_range(struct inode *inode,
+               return 0;
+ 
+       if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
++              int id_count = ocfs2_max_inline_data_with_xattr(inode->i_sb, di);
++
++              if (byte_start > id_count || byte_start + byte_len > id_count) {
++                      ret = -EINVAL;
++                      mlog_errno(ret);
++                      goto out;
++              }
++
+               ret = ocfs2_truncate_inline(inode, di_bh, byte_start,
+                                           byte_start + byte_len, 0);
+               if (ret) {
+-- 
+2.43.0
+
diff --git a/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch b/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch

new file mode 100644 (file)

index 0000000..8face5d
--- /dev/null
+++ b/queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch
@@ -0,0 +1,97 @@
+From fe97ed0862384798e6288056ef6aef29d8a3383d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 21 Oct 2024 11:52:41 -0400
+Subject: phy: freescale: imx8m-pcie: Do CMN_RST just before PHY PLL lock check
+
+From: Richard Zhu <hongxing.zhu@nxp.com>
+
+[ Upstream commit f89263b69731e0144d275fff777ee0dd92069200 ]
+
+When enable initcall_debug together with higher debug level below.
+CONFIG_CONSOLE_LOGLEVEL_DEFAULT=9
+CONFIG_CONSOLE_LOGLEVEL_QUIET=9
+CONFIG_MESSAGE_LOGLEVEL_DEFAULT=7
+
+The initialization of i.MX8MP PCIe PHY might be timeout failed randomly.
+To fix this issue, adjust the sequence of the resets refer to the power
+up sequence listed below.
+
+i.MX8MP PCIe PHY power up sequence:
+                          /---------------------------------------------
+1.8v supply     ---------/
+                    /---------------------------------------------------
+0.8v supply     ---/
+
+                ---\ /--------------------------------------------------
+                    X        REFCLK Valid
+Reference Clock ---/ \--------------------------------------------------
+                             -------------------------------------------
+                             |
+i_init_restn    --------------
+                                    ------------------------------------
+                                    |
+i_cmn_rstn      ---------------------
+                                         -------------------------------
+                                         |
+o_pll_lock_done --------------------------
+
+Logs:
+imx6q-pcie 33800000.pcie: host bridge /soc@0/pcie@33800000 ranges:
+imx6q-pcie 33800000.pcie:       IO 0x001ff80000..0x001ff8ffff -> 0x0000000000
+imx6q-pcie 33800000.pcie:      MEM 0x0018000000..0x001fefffff -> 0x0018000000
+probe of clk_imx8mp_audiomix.reset.0 returned 0 after 1052 usecs
+probe of 30e20000.clock-controller returned 0 after 32971 usecs
+phy phy-32f00000.pcie-phy.4: phy poweron failed --> -110
+probe of 30e10000.dma-controller returned 0 after 10235 usecs
+imx6q-pcie 33800000.pcie: waiting for PHY ready timeout!
+dwhdmi-imx 32fd8000.hdmi: Detected HDMI TX controller v2.13a with HDCP (samsung_dw_hdmi_phy2)
+imx6q-pcie 33800000.pcie: probe with driver imx6q-pcie failed with error -110
+
+Fixes: dce9edff16ee ("phy: freescale: imx8m-pcie: Add i.MX8MP PCIe PHY support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Richard Zhu <hongxing.zhu@nxp.com>
+Signed-off-by: Frank Li <Frank.Li@nxp.com>
+
+v2 changes:
+- Rebase to latest fixes branch of linux-phy git repo.
+- Richard's environment have problem and can't sent out patch. So I help
+post this fix patch.
+
+Link: https://lore.kernel.org/r/20241021155241.943665-1-Frank.Li@nxp.com
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/phy/freescale/phy-fsl-imx8m-pcie.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+index 11fcb1867118c..e98361dcdeadf 100644
+--- a/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
++++ b/drivers/phy/freescale/phy-fsl-imx8m-pcie.c
+@@ -141,11 +141,6 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
+                          IMX8MM_GPR_PCIE_REF_CLK_PLL);
+       usleep_range(100, 200);
+ 
+-      /* Do the PHY common block reset */
+-      regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
+-                         IMX8MM_GPR_PCIE_CMN_RST,
+-                         IMX8MM_GPR_PCIE_CMN_RST);
+-
+       switch (imx8_phy->drvdata->variant) {
+       case IMX8MP:
+               reset_control_deassert(imx8_phy->perst);
+@@ -156,6 +151,11 @@ static int imx8_pcie_phy_power_on(struct phy *phy)
+               break;
+       }
+ 
++      /* Do the PHY common block reset */
++      regmap_update_bits(imx8_phy->iomuxc_gpr, IOMUXC_GPR14,
++                         IMX8MM_GPR_PCIE_CMN_RST,
++                         IMX8MM_GPR_PCIE_CMN_RST);
++
+       /* Polling to check the phy is ready or not. */
+       ret = readl_poll_timeout(imx8_phy->base + IMX8MM_PCIE_PHY_CMN_REG075,
+                                val, val == ANA_PLL_DONE, 10, 20000);
+-- 
+2.43.0
+
diff --git a/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch b/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch

new file mode 100644 (file)

index 0000000..4ab257a
--- /dev/null
+++ b/queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch
@@ -0,0 +1,92 @@
+From c528ea9e359867bcd3f91efc07cb58749992a3fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 18:35:35 -0700
+Subject: posix-cpu-timers: Clear TICK_DEP_BIT_POSIX_TIMER on clone
+
+From: Benjamin Segall <bsegall@google.com>
+
+[ Upstream commit b5413156bad91dc2995a5c4eab1b05e56914638a ]
+
+When cloning a new thread, its posix_cputimers are not inherited, and
+are cleared by posix_cputimers_init(). However, this does not clear the
+tick dependency it creates in tsk->tick_dep_mask, and the handler does
+not reach the code to clear the dependency if there were no timers to
+begin with.
+
+Thus if a thread has a cputimer running before clone/fork, all
+descendants will prevent nohz_full unless they create a cputimer of
+their own.
+
+Fix this by entirely clearing the tick_dep_mask in copy_process().
+(There is currently no inherited state that needs a tick dependency)
+
+Process-wide timers do not have this problem because fork does not copy
+signal_struct as a baseline, it creates one from scratch.
+
+Fixes: b78783000d5c ("posix-cpu-timers: Migrate to use new tick dependency mask model")
+Signed-off-by: Ben Segall <bsegall@google.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Frederic Weisbecker <frederic@kernel.org>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/xm26o737bq8o.fsf@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tick.h | 8 ++++++++
+ kernel/fork.c        | 2 ++
+ 2 files changed, 10 insertions(+)
+
+diff --git a/include/linux/tick.h b/include/linux/tick.h
+index 72744638c5b0f..99c9c5a7252aa 100644
+--- a/include/linux/tick.h
++++ b/include/linux/tick.h
+@@ -251,12 +251,19 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
+       if (tick_nohz_full_enabled())
+               tick_nohz_dep_set_task(tsk, bit);
+ }
++
+ static inline void tick_dep_clear_task(struct task_struct *tsk,
+                                      enum tick_dep_bits bit)
+ {
+       if (tick_nohz_full_enabled())
+               tick_nohz_dep_clear_task(tsk, bit);
+ }
++
++static inline void tick_dep_init_task(struct task_struct *tsk)
++{
++      atomic_set(&tsk->tick_dep_mask, 0);
++}
++
+ static inline void tick_dep_set_signal(struct task_struct *tsk,
+                                      enum tick_dep_bits bit)
+ {
+@@ -290,6 +297,7 @@ static inline void tick_dep_set_task(struct task_struct *tsk,
+                                    enum tick_dep_bits bit) { }
+ static inline void tick_dep_clear_task(struct task_struct *tsk,
+                                      enum tick_dep_bits bit) { }
++static inline void tick_dep_init_task(struct task_struct *tsk) { }
+ static inline void tick_dep_set_signal(struct task_struct *tsk,
+                                      enum tick_dep_bits bit) { }
+ static inline void tick_dep_clear_signal(struct signal_struct *signal,
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 6b97fb2ac4af5..dbf3c5d81df3b 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -104,6 +104,7 @@
+ #include <linux/rseq.h>
+ #include <uapi/linux/pidfd.h>
+ #include <linux/pidfs.h>
++#include <linux/tick.h>
+ 
+ #include <asm/pgalloc.h>
+ #include <linux/uaccess.h>
+@@ -2290,6 +2291,7 @@ __latent_entropy struct task_struct *copy_process(
+       acct_clear_integrals(p);
+ 
+       posix_cputimers_init(&p->posix_cputimers);
++      tick_dep_init_task(p);
+ 
+       p->io_context = NULL;
+       audit_set_context(p, NULL);
+-- 
+2.43.0
+
diff --git a/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch b/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch

new file mode 100644 (file)

index 0000000..2544ea1
--- /dev/null
+++ b/queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch
@@ -0,0 +1,121 @@
+From 3c8fde3e4e19afe65a67f0b0aefa9a5a627ebc85 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 15:03:47 -0400
+Subject: resource,kexec: walk_system_ram_res_rev must retain resource flags
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Gregory Price <gourry@gourry.net>
+
+[ Upstream commit b125a0def25a082ae944c9615208bf359abdb61c ]
+
+walk_system_ram_res_rev() erroneously discards resource flags when passing
+the information to the callback.
+
+This causes systems with IORESOURCE_SYSRAM_DRIVER_MANAGED memory to have
+these resources selected during kexec to store kexec buffers if that
+memory happens to be at placed above normal system ram.
+
+This leads to undefined behavior after reboot.  If the kexec buffer is
+never touched, nothing happens.  If the kexec buffer is touched, it could
+lead to a crash (like below) or undefined behavior.
+
+Tested on a system with CXL memory expanders with driver managed memory,
+TPM enabled, and CONFIG_IMA_KEXEC=y.  Adding printk's showed the flags
+were being discarded and as a result the check for
+IORESOURCE_SYSRAM_DRIVER_MANAGED passes.
+
+find_next_iomem_res: name(System RAM (kmem))
+                    start(10000000000)
+                    end(1034fffffff)
+                    flags(83000200)
+
+locate_mem_hole_top_down: start(10000000000) end(1034fffffff) flags(0)
+
+[.] BUG: unable to handle page fault for address: ffff89834ffff000
+[.] #PF: supervisor read access in kernel mode
+[.] #PF: error_code(0x0000) - not-present page
+[.] PGD c04c8bf067 P4D c04c8bf067 PUD c04c8be067 PMD 0
+[.] Oops: 0000 [#1] SMP
+[.] RIP: 0010:ima_restore_measurement_list+0x95/0x4b0
+[.] RSP: 0018:ffffc900000d3a80 EFLAGS: 00010286
+[.] RAX: 0000000000001000 RBX: 0000000000000000 RCX: ffff89834ffff000
+[.] RDX: 0000000000000018 RSI: ffff89834ffff000 RDI: ffff89834ffff018
+[.] RBP: ffffc900000d3ba0 R08: 0000000000000020 R09: ffff888132b8a900
+[.] R10: 4000000000000000 R11: 000000003a616d69 R12: 0000000000000000
+[.] R13: ffffffff8404ac28 R14: 0000000000000000 R15: ffff89834ffff000
+[.] FS:  0000000000000000(0000) GS:ffff893d44640000(0000) knlGS:0000000000000000
+[.] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[.] ata5: SATA link down (SStatus 0 SControl 300)
+[.] CR2: ffff89834ffff000 CR3: 000001034d00f001 CR4: 0000000000770ef0
+[.] PKRU: 55555554
+[.] Call Trace:
+[.]  <TASK>
+[.]  ? __die+0x78/0xc0
+[.]  ? page_fault_oops+0x2a8/0x3a0
+[.]  ? exc_page_fault+0x84/0x130
+[.]  ? asm_exc_page_fault+0x22/0x30
+[.]  ? ima_restore_measurement_list+0x95/0x4b0
+[.]  ? template_desc_init_fields+0x317/0x410
+[.]  ? crypto_alloc_tfm_node+0x9c/0xc0
+[.]  ? init_ima_lsm+0x30/0x30
+[.]  ima_load_kexec_buffer+0x72/0xa0
+[.]  ima_init+0x44/0xa0
+[.]  __initstub__kmod_ima__373_1201_init_ima7+0x1e/0xb0
+[.]  ? init_ima_lsm+0x30/0x30
+[.]  do_one_initcall+0xad/0x200
+[.]  ? idr_alloc_cyclic+0xaa/0x110
+[.]  ? new_slab+0x12c/0x420
+[.]  ? new_slab+0x12c/0x420
+[.]  ? number+0x12a/0x430
+[.]  ? sysvec_apic_timer_interrupt+0xa/0x80
+[.]  ? asm_sysvec_apic_timer_interrupt+0x16/0x20
+[.]  ? parse_args+0xd4/0x380
+[.]  ? parse_args+0x14b/0x380
+[.]  kernel_init_freeable+0x1c1/0x2b0
+[.]  ? rest_init+0xb0/0xb0
+[.]  kernel_init+0x16/0x1a0
+[.]  ret_from_fork+0x2f/0x40
+[.]  ? rest_init+0xb0/0xb0
+[.]  ret_from_fork_asm+0x11/0x20
+[.]  </TASK>
+
+Link: https://lore.kernel.org/all/20231114091658.228030-1-bhe@redhat.com/
+Link: https://lkml.kernel.org/r/20241017190347.5578-1-gourry@gourry.net
+Fixes: 7acf164b259d ("resource: add walk_system_ram_res_rev()")
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Dan Williams <dan.j.williams@intel.com>
+Acked-by: Baoquan He <bhe@redhat.com>
+Cc: AKASHI Takahiro <takahiro.akashi@linaro.org>
+Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: Bjorn Helgaas <bhelgaas@google.com>
+Cc: "Huang, Ying" <ying.huang@intel.com>
+Cc: Ilpo Järvinen <ilpo.jarvinen@linux.intel.com>
+Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/resource.c | 4 +---
+ 1 file changed, 1 insertion(+), 3 deletions(-)
+
+diff --git a/kernel/resource.c b/kernel/resource.c
+index 1681ab5012e12..4f3df25176caa 100644
+--- a/kernel/resource.c
++++ b/kernel/resource.c
+@@ -460,9 +460,7 @@ int walk_system_ram_res_rev(u64 start, u64 end, void *arg,
+                       rams_size += 16;
+               }
+ 
+-              rams[i].start = res.start;
+-              rams[i++].end = res.end;
+-
++              rams[i++] = res;
+               start = res.end + 1;
+       }
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch b/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch

new file mode 100644 (file)

index 0000000..580d7d5
--- /dev/null
+++ b/queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
@@ -0,0 +1,48 @@
+From 5a9b486cb221960cd9a9636cced3d4cae1cb45c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 29 Sep 2024 16:02:33 +0200
+Subject: riscv: efi: Set NX compat flag in PE/COFF header
+
+From: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+
+[ Upstream commit d41373a4b910961df5a5e3527d7bde6ad45ca438 ]
+
+The IMAGE_DLLCHARACTERISTICS_NX_COMPAT informs the firmware that the
+EFI binary does not rely on pages that are both executable and
+writable.
+
+The flag is used by some distro versions of GRUB to decide if the EFI
+binary may be executed.
+
+As the Linux kernel neither has RWX sections nor needs RWX pages for
+relocation we should set the flag.
+
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Heinrich Schuchardt <heinrich.schuchardt@canonical.com>
+Reviewed-by: Emil Renner Berthing <emil.renner.berthing@canonical.com>
+Fixes: cb7d2dd5612a ("RISC-V: Add PE/COFF header for EFI stub")
+Acked-by: Ard Biesheuvel <ardb@kernel.org>
+Link: https://lore.kernel.org/r/20240929140233.211800-1-heinrich.schuchardt@canonical.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/efi-header.S | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/efi-header.S b/arch/riscv/kernel/efi-header.S
+index 515b2dfbca75b..c5f17c2710b58 100644
+--- a/arch/riscv/kernel/efi-header.S
++++ b/arch/riscv/kernel/efi-header.S
+@@ -64,7 +64,7 @@ extra_header_fields:
+       .long   efi_header_end - _start                 // SizeOfHeaders
+       .long   0                                       // CheckSum
+       .short  IMAGE_SUBSYSTEM_EFI_APPLICATION         // Subsystem
+-      .short  0                                       // DllCharacteristics
++      .short  IMAGE_DLL_CHARACTERISTICS_NX_COMPAT     // DllCharacteristics
+       .quad   0                                       // SizeOfStackReserve
+       .quad   0                                       // SizeOfStackCommit
+       .quad   0                                       // SizeOfHeapReserve
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch b/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch

new file mode 100644 (file)

index 0000000..7a71274
--- /dev/null
+++ b/queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch
@@ -0,0 +1,66 @@
+From 4431ae6ccacaf738fc8dca807771a7725985ae15 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 Sep 2024 10:00:52 +0200
+Subject: riscv: Prevent a bad reference count on CPU nodes
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Miquel Sabaté Solà <mikisabate@gmail.com>
+
+[ Upstream commit 37233169a6ea912020c572f870075a63293b786a ]
+
+When populating cache leaves we previously fetched the CPU device node
+at the very beginning. But when ACPI is enabled we go through a
+specific branch which returns early and does not call 'of_node_put' for
+the node that was acquired.
+
+Since we are not using a CPU device node for the ACPI code anyways, we
+can simply move the initialization of it just passed the ACPI block, and
+we are guaranteed to have an 'of_node_put' call for the acquired node.
+This prevents a bad reference count of the CPU device node.
+
+Moreover, the previous function did not check for errors when acquiring
+the device node, so a return -ENOENT has been added for that case.
+
+Signed-off-by: Miquel Sabaté Solà <mikisabate@gmail.com>
+Reviewed-by: Sudeep Holla <sudeep.holla@arm.com>
+Reviewed-by: Sunil V L <sunilvl@ventanamicro.com>
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Fixes: 604f32ea6909 ("riscv: cacheinfo: initialize cacheinfo's level and  type from ACPI PPTT")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20240913080053.36636-1-mikisabate@gmail.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cacheinfo.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c
+index d6c108c50cba9..d32dfdba083e1 100644
+--- a/arch/riscv/kernel/cacheinfo.c
++++ b/arch/riscv/kernel/cacheinfo.c
+@@ -75,8 +75,7 @@ int populate_cache_leaves(unsigned int cpu)
+ {
+       struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+       struct cacheinfo *this_leaf = this_cpu_ci->info_list;
+-      struct device_node *np = of_cpu_device_node_get(cpu);
+-      struct device_node *prev = NULL;
++      struct device_node *np, *prev;
+       int levels = 1, level = 1;
+ 
+       if (!acpi_disabled) {
+@@ -100,6 +99,10 @@ int populate_cache_leaves(unsigned int cpu)
+               return 0;
+       }
+ 
++      np = of_cpu_device_node_get(cpu);
++      if (!np)
++              return -ENOENT;
++
+       if (of_property_read_bool(np, "cache-size"))
+               ci_leaf_init(this_leaf++, CACHE_TYPE_UNIFIED, level);
+       if (of_property_read_bool(np, "i-cache-size"))
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-remove-duplicated-get_rm.patch b/queue-6.11/riscv-remove-duplicated-get_rm.patch

new file mode 100644 (file)

index 0000000..1130c9f
--- /dev/null
+++ b/queue-6.11/riscv-remove-duplicated-get_rm.patch
@@ -0,0 +1,38 @@
+From b8bca16f3281116851f7e9b59e2a8c3eefac5af5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:39 +0800
+Subject: riscv: Remove duplicated GET_RM
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 164f66de6bb6ef454893f193c898dc8f1da6d18b ]
+
+The macro GET_RM defined twice in this file, one can be removed.
+
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Fixes: 956d705dd279 ("riscv: Unaligned load/store handling for M_MODE")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/20241008094141.549248-3-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/traps_misaligned.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/traps_misaligned.c b/arch/riscv/kernel/traps_misaligned.c
+index d4fd8af7aaf5a..1b9867136b610 100644
+--- a/arch/riscv/kernel/traps_misaligned.c
++++ b/arch/riscv/kernel/traps_misaligned.c
+@@ -136,8 +136,6 @@
+ #define REG_PTR(insn, pos, regs)      \
+       (ulong *)((ulong)(regs) + REG_OFFSET(insn, pos))
+ 
+-#define GET_RM(insn)                  (((insn) >> 12) & 7)
+-
+ #define GET_RS1(insn, regs)           (*REG_PTR(insn, SH_RS1, regs))
+ #define GET_RS2(insn, regs)           (*REG_PTR(insn, SH_RS2, regs))
+ #define GET_RS1S(insn, regs)          (*REG_PTR(RVC_RS1S(insn), 0, regs))
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch b/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch

new file mode 100644 (file)

index 0000000..7adb676
--- /dev/null
+++ b/queue-6.11/riscv-remove-unused-generating_asm_offsets.patch
@@ -0,0 +1,44 @@
+From a7393a0a42f66bb6aca48a9e82d700cdf30aac48 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Oct 2024 17:41:38 +0800
+Subject: riscv: Remove unused GENERATING_ASM_OFFSETS
+
+From: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+
+[ Upstream commit 46d4e5ac6f2f801f97bcd0ec82365969197dc9b1 ]
+
+The macro is not used in the current version of kernel, it looks like
+can be removed to avoid a build warning:
+
+../arch/riscv/kernel/asm-offsets.c: At top level:
+../arch/riscv/kernel/asm-offsets.c:7: warning: macro "GENERATING_ASM_OFFSETS" is not used [-Wunused-macros]
+    7 | #define GENERATING_ASM_OFFSETS
+
+Fixes: 9639a44394b9 ("RISC-V: Provide a cleaner raw_smp_processor_id()")
+Cc: stable@vger.kernel.org
+Reviewed-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Tested-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Signed-off-by: Chunyan Zhang <zhangchunyan@iscas.ac.cn>
+Link: https://lore.kernel.org/r/20241008094141.549248-2-zhangchunyan@iscas.ac.cn
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/asm-offsets.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/riscv/kernel/asm-offsets.c b/arch/riscv/kernel/asm-offsets.c
+index b09ca5f944f77..cb09f0c4f62c7 100644
+--- a/arch/riscv/kernel/asm-offsets.c
++++ b/arch/riscv/kernel/asm-offsets.c
+@@ -4,8 +4,6 @@
+  * Copyright (C) 2017 SiFive
+  */
+ 
+-#define GENERATING_ASM_OFFSETS
+-
+ #include <linux/kbuild.h>
+ #include <linux/mm.h>
+ #include <linux/sched.h>
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch b/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch

new file mode 100644 (file)

index 0000000..75c298c
--- /dev/null
+++ b/queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch
@@ -0,0 +1,43 @@
+From 139afa4d70e25bea309c492ae9f4d68364ac9029 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 17 Oct 2024 11:20:10 +0800
+Subject: riscv: Use '%u' to format the output of 'cpu'
+
+From: WangYuli <wangyuli@uniontech.com>
+
+[ Upstream commit e0872ab72630dada3ae055bfa410bf463ff1d1e0 ]
+
+'cpu' is an unsigned integer, so its conversion specifier should
+be %u, not %d.
+
+Suggested-by: Wentao Guan <guanwentao@uniontech.com>
+Suggested-by: Maciej W. Rozycki <macro@orcam.me.uk>
+Link: https://lore.kernel.org/all/alpine.DEB.2.21.2409122309090.40372@angie.orcam.me.uk/
+Signed-off-by: WangYuli <wangyuli@uniontech.com>
+Reviewed-by: Charlie Jenkins <charlie@rivosinc.com>
+Tested-by: Charlie Jenkins <charlie@rivosinc.com>
+Fixes: f1e58583b9c7 ("RISC-V: Support cpu hotplug")
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/r/4C127DEECDA287C8+20241017032010.96772-1-wangyuli@uniontech.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/cpu-hotplug.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/riscv/kernel/cpu-hotplug.c b/arch/riscv/kernel/cpu-hotplug.c
+index 28b58fc5ad199..a1e38ecfc8be2 100644
+--- a/arch/riscv/kernel/cpu-hotplug.c
++++ b/arch/riscv/kernel/cpu-hotplug.c
+@@ -58,7 +58,7 @@ void arch_cpuhp_cleanup_dead_cpu(unsigned int cpu)
+       if (cpu_ops->cpu_is_stopped)
+               ret = cpu_ops->cpu_is_stopped(cpu);
+       if (ret)
+-              pr_warn("CPU%d may not have stopped: %d\n", cpu, ret);
++              pr_warn("CPU%u may not have stopped: %d\n", cpu, ret);
+ }
+ 
+ /*
+-- 
+2.43.0
+
diff --git a/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch b/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch

new file mode 100644 (file)

index 0000000..1a0b1ef
--- /dev/null
+++ b/queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
@@ -0,0 +1,40 @@
+From 8db7792ec8fbce816af7a6506cc3fe226f54365b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 10:36:24 +0200
+Subject: riscv: vdso: Prevent the compiler from inserting calls to memset()
+
+From: Alexandre Ghiti <alexghiti@rivosinc.com>
+
+[ Upstream commit bf40167d54d55d4b54d0103713d86a8638fb9290 ]
+
+The compiler is smart enough to insert a call to memset() in
+riscv_vdso_get_cpus(), which generates a dynamic relocation.
+
+So prevent this by using -fno-builtin option.
+
+Fixes: e2c0cdfba7f6 ("RISC-V: User-facing API")
+Cc: stable@vger.kernel.org
+Signed-off-by: Alexandre Ghiti <alexghiti@rivosinc.com>
+Reviewed-by: Guo Ren <guoren@kernel.org>
+Link: https://lore.kernel.org/r/20241016083625.136311-2-alexghiti@rivosinc.com
+Signed-off-by: Palmer Dabbelt <palmer@rivosinc.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/riscv/kernel/vdso/Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile
+index f7ef8ad9b550d..54a7fec25d5f8 100644
+--- a/arch/riscv/kernel/vdso/Makefile
++++ b/arch/riscv/kernel/vdso/Makefile
+@@ -18,6 +18,7 @@ obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+ 
+ ccflags-y := -fno-stack-protector
+ ccflags-y += -DDISABLE_BRANCH_PROFILING
++ccflags-y += -fno-builtin
+ 
+ ifneq ($(c-gettimeofday-y),)
+   CFLAGS_vgettimeofday.o += -fPIC -include $(c-gettimeofday-y)
+-- 
+2.43.0
+
diff --git a/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch b/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch

new file mode 100644 (file)

index 0000000..6914525
--- /dev/null
+++ b/queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch
@@ -0,0 +1,90 @@
+From cbf6ae0a8b90bb28788732c8ad9bd6784ef1c7c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 10:22:08 +0800
+Subject: sched/numa: Fix the potential null pointer dereference in
+ task_numa_work()
+
+From: Shawn Wang <shawnwang@linux.alibaba.com>
+
+[ Upstream commit 9c70b2a33cd2aa6a5a59c5523ef053bd42265209 ]
+
+When running stress-ng-vm-segv test, we found a null pointer dereference
+error in task_numa_work(). Here is the backtrace:
+
+  [323676.066985] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000020
+  ......
+  [323676.067108] CPU: 35 PID: 2694524 Comm: stress-ng-vm-se
+  ......
+  [323676.067113] pstate: 23401009 (nzCv daif +PAN -UAO +TCO +DIT +SSBS BTYPE=--)
+  [323676.067115] pc : vma_migratable+0x1c/0xd0
+  [323676.067122] lr : task_numa_work+0x1ec/0x4e0
+  [323676.067127] sp : ffff8000ada73d20
+  [323676.067128] x29: ffff8000ada73d20 x28: 0000000000000000 x27: 000000003e89f010
+  [323676.067130] x26: 0000000000080000 x25: ffff800081b5c0d8 x24: ffff800081b27000
+  [323676.067133] x23: 0000000000010000 x22: 0000000104d18cc0 x21: ffff0009f7158000
+  [323676.067135] x20: 0000000000000000 x19: 0000000000000000 x18: ffff8000ada73db8
+  [323676.067138] x17: 0001400000000000 x16: ffff800080df40b0 x15: 0000000000000035
+  [323676.067140] x14: ffff8000ada73cc8 x13: 1fffe0017cc72001 x12: ffff8000ada73cc8
+  [323676.067142] x11: ffff80008001160c x10: ffff000be639000c x9 : ffff8000800f4ba4
+  [323676.067145] x8 : ffff000810375000 x7 : ffff8000ada73974 x6 : 0000000000000001
+  [323676.067147] x5 : 0068000b33e26707 x4 : 0000000000000001 x3 : ffff0009f7158000
+  [323676.067149] x2 : 0000000000000041 x1 : 0000000000004400 x0 : 0000000000000000
+  [323676.067152] Call trace:
+  [323676.067153]  vma_migratable+0x1c/0xd0
+  [323676.067155]  task_numa_work+0x1ec/0x4e0
+  [323676.067157]  task_work_run+0x78/0xd8
+  [323676.067161]  do_notify_resume+0x1ec/0x290
+  [323676.067163]  el0_svc+0x150/0x160
+  [323676.067167]  el0t_64_sync_handler+0xf8/0x128
+  [323676.067170]  el0t_64_sync+0x17c/0x180
+  [323676.067173] Code: d2888001 910003fd f9000bf3 aa0003f3 (f9401000)
+  [323676.067177] SMP: stopping secondary CPUs
+  [323676.070184] Starting crashdump kernel...
+
+stress-ng-vm-segv in stress-ng is used to stress test the SIGSEGV error
+handling function of the system, which tries to cause a SIGSEGV error on
+return from unmapping the whole address space of the child process.
+
+Normally this program will not cause kernel crashes. But before the
+munmap system call returns to user mode, a potential task_numa_work()
+for numa balancing could be added and executed. In this scenario, since the
+child process has no vma after munmap, the vma_next() in task_numa_work()
+will return a null pointer even if the vma iterator restarts from 0.
+
+Recheck the vma pointer before dereferencing it in task_numa_work().
+
+Fixes: 214dbc428137 ("sched: convert to vma iterator")
+Signed-off-by: Shawn Wang <shawnwang@linux.alibaba.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: stable@vger.kernel.org # v6.2+
+Link: https://lkml.kernel.org/r/20241025022208.125527-1-shawnwang@linux.alibaba.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 1d2cbdb162a67..425348b8d9eb3 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -3289,7 +3289,7 @@ static void task_numa_work(struct callback_head *work)
+               vma = vma_next(&vmi);
+       }
+ 
+-      do {
++      for (; vma; vma = vma_next(&vmi)) {
+               if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
+                       is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
+                       trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE);
+@@ -3411,7 +3411,7 @@ static void task_numa_work(struct callback_head *work)
+                */
+               if (vma_pids_forced)
+                       break;
+-      } for_each_vma(vmi, vma);
++      }
+ 
+       /*
+        * If no VMAs are remaining and VMAs were skipped due to the PID
+-- 
+2.43.0
+
diff --git a/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch b/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch

new file mode 100644 (file)

index 0000000..41a2f7e
--- /dev/null
+++ b/queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch
@@ -0,0 +1,43 @@
+From 571a3552681bc8f34155b1e4b0a905eaf0f43359 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 24 Oct 2024 09:54:53 +0800
+Subject: scsi: ufs: core: Fix another deadlock during RTC update
+
+From: Peter Wang <peter.wang@mediatek.com>
+
+[ Upstream commit cb7e509c4e0197f63717fee54fb41c4990ba8d3a ]
+
+If ufshcd_rtc_work calls ufshcd_rpm_put_sync() and the pm's usage_count
+is 0, we will enter the runtime suspend callback.  However, the runtime
+suspend callback will wait to flush ufshcd_rtc_work, causing a deadlock.
+
+Replace ufshcd_rpm_put_sync() with ufshcd_rpm_put() to avoid the
+deadlock.
+
+Fixes: 6bf999e0eb41 ("scsi: ufs: core: Add UFS RTC support")
+Cc: stable@vger.kernel.org #6.11.x
+Signed-off-by: Peter Wang <peter.wang@mediatek.com>
+Link: https://lore.kernel.org/r/20241024015453.21684-1-peter.wang@mediatek.com
+Reviewed-by: Bart Van Assche <bvanassche@acm.org>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/ufs/core/ufshcd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c
+index 09408642a6efb..83567388a7b58 100644
+--- a/drivers/ufs/core/ufshcd.c
++++ b/drivers/ufs/core/ufshcd.c
+@@ -8224,7 +8224,7 @@ static void ufshcd_update_rtc(struct ufs_hba *hba)
+ 
+       err = ufshcd_query_attr(hba, UPIU_QUERY_OPCODE_WRITE_ATTR, QUERY_ATTR_IDN_SECONDS_PASSED,
+                               0, 0, &val);
+-      ufshcd_rpm_put_sync(hba);
++      ufshcd_rpm_put(hba);
+ 
+       if (err)
+               dev_err(hba->dev, "%s: Failed to update rtc %d\n", __func__, err);
+-- 
+2.43.0
+
diff --git a/queue-6.11/series b/queue-6.11/series

index fb277f98db5ef8b6354e5eaca8449abd09c5dc68..ea753d29c01d14d023ac6e7dffe8400dcdd0a72a 100644 (file)
--- a/queue-6.11/series
+++ b/queue-6.11/series
@@ -144,3 +144,66 @@ cxl-port-fix-use-after-free-permit-out-of-order-decoder-shutdown.patch
  cxl-port-fix-cxl-port-initialization-order-when-the-subsystem-is-built-in.patch
  mmc-sdhci-pci-gli-gl9767-fix-low-power-mode-on-the-set-clock-function.patch
  mmc-sdhci-pci-gli-gl9767-fix-low-power-mode-in-the-sd-express-process.patch
+block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch
+cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch
+phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch
+btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch
+btrfs-fix-error-propagation-of-split-bios.patch
+spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch
+iio-light-veml6030-fix-microlux-value-calculation.patch-18046
+riscv-vdso-prevent-the-compiler-from-inserting-calls.patch
+input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch
+alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch
+riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch
+riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch
+riscv-use-u-to-format-the-output-of-cpu.patch
+riscv-remove-unused-generating_asm_offsets.patch
+riscv-remove-duplicated-get_rm.patch
+scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch
+cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch
+cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch
+sched-numa-fix-the-potential-null-pointer-dereferenc.patch
+posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch
+iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch
+tpm-return-tpm2_sessions_init-when-null-key-creation.patch
+tpm-rollback-tpm2_load_null.patch
+drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch
+drm-amdgpu-smu13-fix-profile-reporting.patch
+tpm-lazily-flush-the-auth-session.patch
+mptcp-init-protect-sched-with-rcu_read_lock.patch
+mei-use-kvmalloc-for-read-buffer.patch
+fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch
+fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch
+mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch
+x86-traps-enable-ubsan-traps-on-x86.patch
+x86-traps-move-kmsan-check-after-instrumentation_beg.patch
+ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch
+resource-kexec-walk_system_ram_res_rev-must-retain-r.patch
+mctp-i2c-handle-null-header-address.patch
+btrfs-fix-use-after-free-of-block-device-file-in-__b.patch
+accel-ivpu-fix-noc-firewall-interrupt-handling.patch
+xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch
+alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch
+alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch
+nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch
+nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch
+kasan-remove-vmalloc_percpu-test.patch
+drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch
+drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch
+drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch
+drm-xe-fix-register-definition-order-in-xe_regs.h.patch
+drm-xe-kill-regs-xe_sriov_regs.h.patch
+drm-xe-add-mmio-read-before-ggtt-invalidate.patch
+drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch
+io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch
+btrfs-fix-extent-map-merging-not-happening-for-adjac.patch
+btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch
+gpiolib-fix-debugfs-newline-separators.patch
+gpiolib-fix-debugfs-dangling-chip-separator.patch
+vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch
+mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch
+input-fix-regression-when-re-registering-input-handl.patch
+mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch
+mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch
+mm-shrink-skip-folio-mapped-by-an-exiting-process.patch
+mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch
diff --git a/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch b/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch

new file mode 100644 (file)

index 0000000..2f611a0
--- /dev/null
+++ b/queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch
@@ -0,0 +1,86 @@
+From dd51986062472b24ac98f9a7b4a8c313bb640b5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 16:30:32 -0400
+Subject: spi: spi-fsl-dspi: Fix crash when not using GPIO chip select
+
+From: Frank Li <Frank.Li@nxp.com>
+
+[ Upstream commit 25f00a13dccf8e45441265768de46c8bf58e08f6 ]
+
+Add check for the return value of spi_get_csgpiod() to avoid passing a NULL
+pointer to gpiod_direction_output(), preventing a crash when GPIO chip
+select is not used.
+
+Fix below crash:
+[    4.251960] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000000
+[    4.260762] Mem abort info:
+[    4.263556]   ESR = 0x0000000096000004
+[    4.267308]   EC = 0x25: DABT (current EL), IL = 32 bits
+[    4.272624]   SET = 0, FnV = 0
+[    4.275681]   EA = 0, S1PTW = 0
+[    4.278822]   FSC = 0x04: level 0 translation fault
+[    4.283704] Data abort info:
+[    4.286583]   ISV = 0, ISS = 0x00000004, ISS2 = 0x00000000
+[    4.292074]   CM = 0, WnR = 0, TnD = 0, TagAccess = 0
+[    4.297130]   GCS = 0, Overlay = 0, DirtyBit = 0, Xs = 0
+[    4.302445] [0000000000000000] user address but active_mm is swapper
+[    4.308805] Internal error: Oops: 0000000096000004 [#1] PREEMPT SMP
+[    4.315072] Modules linked in:
+[    4.318124] CPU: 2 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.12.0-rc4-next-20241023-00008-ga20ec42c5fc1 #359
+[    4.328130] Hardware name: LS1046A QDS Board (DT)
+[    4.332832] pstate: 40000005 (nZcv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[    4.339794] pc : gpiod_direction_output+0x34/0x5c
+[    4.344505] lr : gpiod_direction_output+0x18/0x5c
+[    4.349208] sp : ffff80008003b8f0
+[    4.352517] x29: ffff80008003b8f0 x28: 0000000000000000 x27: ffffc96bcc7e9068
+[    4.359659] x26: ffffc96bcc6e00b0 x25: ffffc96bcc598398 x24: ffff447400132810
+[    4.366800] x23: 0000000000000000 x22: 0000000011e1a300 x21: 0000000000020002
+[    4.373940] x20: 0000000000000000 x19: 0000000000000000 x18: ffffffffffffffff
+[    4.381081] x17: ffff44740016e600 x16: 0000000500000003 x15: 0000000000000007
+[    4.388221] x14: 0000000000989680 x13: 0000000000020000 x12: 000000000000001e
+[    4.395362] x11: 0044b82fa09b5a53 x10: 0000000000000019 x9 : 0000000000000008
+[    4.402502] x8 : 0000000000000002 x7 : 0000000000000007 x6 : 0000000000000000
+[    4.409641] x5 : 0000000000000200 x4 : 0000000002000000 x3 : 0000000000000000
+[    4.416781] x2 : 0000000000022202 x1 : 0000000000000000 x0 : 0000000000000000
+[    4.423921] Call trace:
+[    4.426362]  gpiod_direction_output+0x34/0x5c (P)
+[    4.431067]  gpiod_direction_output+0x18/0x5c (L)
+[    4.435771]  dspi_setup+0x220/0x334
+
+Fixes: 9e264f3f85a5 ("spi: Replace all spi->chip_select and spi->cs_gpiod references with function call")
+Cc: stable@vger.kernel.org
+Signed-off-by: Frank Li <Frank.Li@nxp.com>
+Link: https://patch.msgid.link/20241023203032.1388491-1-Frank.Li@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-fsl-dspi.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-fsl-dspi.c b/drivers/spi/spi-fsl-dspi.c
+index 191de1917f831..3fa990fb59c78 100644
+--- a/drivers/spi/spi-fsl-dspi.c
++++ b/drivers/spi/spi-fsl-dspi.c
+@@ -1003,6 +1003,7 @@ static int dspi_setup(struct spi_device *spi)
+       u32 cs_sck_delay = 0, sck_cs_delay = 0;
+       struct fsl_dspi_platform_data *pdata;
+       unsigned char pasc = 0, asc = 0;
++      struct gpio_desc *gpio_cs;
+       struct chip_data *chip;
+       unsigned long clkrate;
+       bool cs = true;
+@@ -1077,7 +1078,10 @@ static int dspi_setup(struct spi_device *spi)
+                       chip->ctar_val |= SPI_CTAR_LSBFE;
+       }
+ 
+-      gpiod_direction_output(spi_get_csgpiod(spi, 0), false);
++      gpio_cs = spi_get_csgpiod(spi, 0);
++      if (gpio_cs)
++              gpiod_direction_output(gpio_cs, false);
++
+       dspi_deassert_cs(spi, &cs);
+ 
+       spi_set_ctldata(spi, chip);
+-- 
+2.43.0
+
diff --git a/queue-6.11/tpm-lazily-flush-the-auth-session.patch b/queue-6.11/tpm-lazily-flush-the-auth-session.patch

new file mode 100644 (file)

index 0000000..7f11134
--- /dev/null
+++ b/queue-6.11/tpm-lazily-flush-the-auth-session.patch
@@ -0,0 +1,214 @@
+From 514d33f63cd15cd23c6cd3d505cf3e8223283770 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 07:50:01 +0200
+Subject: tpm: Lazily flush the auth session
+
+From: Jarkko Sakkinen <jarkko@kernel.org>
+
+[ Upstream commit df745e25098dcb2f706399c0d06dd8d1bab6b6ec ]
+
+Move the allocation of chip->auth to tpm2_start_auth_session() so that this
+field can be used as flag to tell whether auth session is active or not.
+
+Instead of flushing and reloading the auth session for every transaction
+separately, keep the session open unless /dev/tpm0 is used.
+
+Reported-by: Pengyu Ma <mapengyu@gmail.com>
+Closes: https://bugzilla.kernel.org/show_bug.cgi?id=219229
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: 7ca110f2679b ("tpm: Address !chip->auth in tpm_buf_append_hmac_session*()")
+Tested-by: Pengyu Ma <mapengyu@gmail.com>
+Tested-by: Stefan Berger <stefanb@linux.ibm.com>
+Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm-chip.c       | 10 +++++++
+ drivers/char/tpm/tpm-dev-common.c |  3 +++
+ drivers/char/tpm/tpm-interface.c  |  6 +++--
+ drivers/char/tpm/tpm2-sessions.c  | 45 ++++++++++++++++++-------------
+ 4 files changed, 44 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/char/tpm/tpm-chip.c b/drivers/char/tpm/tpm-chip.c
+index 854546000c92b..1ff99a7091bbb 100644
+--- a/drivers/char/tpm/tpm-chip.c
++++ b/drivers/char/tpm/tpm-chip.c
+@@ -674,6 +674,16 @@ EXPORT_SYMBOL_GPL(tpm_chip_register);
+  */
+ void tpm_chip_unregister(struct tpm_chip *chip)
+ {
++#ifdef CONFIG_TCG_TPM2_HMAC
++      int rc;
++
++      rc = tpm_try_get_ops(chip);
++      if (!rc) {
++              tpm2_end_auth_session(chip);
++              tpm_put_ops(chip);
++      }
++#endif
++
+       tpm_del_legacy_sysfs(chip);
+       if (tpm_is_hwrng_enabled(chip))
+               hwrng_unregister(&chip->hwrng);
+diff --git a/drivers/char/tpm/tpm-dev-common.c b/drivers/char/tpm/tpm-dev-common.c
+index c3fbbf4d3db79..48ff87444f851 100644
+--- a/drivers/char/tpm/tpm-dev-common.c
++++ b/drivers/char/tpm/tpm-dev-common.c
+@@ -27,6 +27,9 @@ static ssize_t tpm_dev_transmit(struct tpm_chip *chip, struct tpm_space *space,
+       struct tpm_header *header = (void *)buf;
+       ssize_t ret, len;
+ 
++      if (chip->flags & TPM_CHIP_FLAG_TPM2)
++              tpm2_end_auth_session(chip);
++
+       ret = tpm2_prepare_space(chip, space, buf, bufsiz);
+       /* If the command is not implemented by the TPM, synthesize a
+        * response with a TPM2_RC_COMMAND_CODE return for user-space.
+diff --git a/drivers/char/tpm/tpm-interface.c b/drivers/char/tpm/tpm-interface.c
+index 5da134f12c9a4..8134f002b121f 100644
+--- a/drivers/char/tpm/tpm-interface.c
++++ b/drivers/char/tpm/tpm-interface.c
+@@ -379,10 +379,12 @@ int tpm_pm_suspend(struct device *dev)
+ 
+       rc = tpm_try_get_ops(chip);
+       if (!rc) {
+-              if (chip->flags & TPM_CHIP_FLAG_TPM2)
++              if (chip->flags & TPM_CHIP_FLAG_TPM2) {
++                      tpm2_end_auth_session(chip);
+                       tpm2_shutdown(chip, TPM2_SU_STATE);
+-              else
++              } else {
+                       rc = tpm1_pm_suspend(chip, tpm_suspend_pcr);
++              }
+ 
+               tpm_put_ops(chip);
+       }
+diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
+index a194535619929..c8fdfe901dfb7 100644
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -333,6 +333,9 @@ void tpm_buf_append_hmac_session(struct tpm_chip *chip, struct tpm_buf *buf,
+       }
+ 
+ #ifdef CONFIG_TCG_TPM2_HMAC
++      /* The first write to /dev/tpm{rm0} will flush the session. */
++      attributes |= TPM2_SA_CONTINUE_SESSION;
++
+       /*
+        * The Architecture Guide requires us to strip trailing zeros
+        * before computing the HMAC
+@@ -484,7 +487,8 @@ static void tpm2_KDFe(u8 z[EC_PT_SZ], const char *str, u8 *pt_u, u8 *pt_v,
+       sha256_final(&sctx, out);
+ }
+ 
+-static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
++static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip,
++                              struct tpm2_auth *auth)
+ {
+       struct crypto_kpp *kpp;
+       struct kpp_request *req;
+@@ -543,7 +547,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
+       sg_set_buf(&s[0], chip->null_ec_key_x, EC_PT_SZ);
+       sg_set_buf(&s[1], chip->null_ec_key_y, EC_PT_SZ);
+       kpp_request_set_input(req, s, EC_PT_SZ*2);
+-      sg_init_one(d, chip->auth->salt, EC_PT_SZ);
++      sg_init_one(d, auth->salt, EC_PT_SZ);
+       kpp_request_set_output(req, d, EC_PT_SZ);
+       crypto_kpp_compute_shared_secret(req);
+       kpp_request_free(req);
+@@ -554,8 +558,7 @@ static void tpm_buf_append_salt(struct tpm_buf *buf, struct tpm_chip *chip)
+        * This works because KDFe fully consumes the secret before it
+        * writes the salt
+        */
+-      tpm2_KDFe(chip->auth->salt, "SECRET", x, chip->null_ec_key_x,
+-                chip->auth->salt);
++      tpm2_KDFe(auth->salt, "SECRET", x, chip->null_ec_key_x, auth->salt);
+ 
+  out:
+       crypto_free_kpp(kpp);
+@@ -853,7 +856,9 @@ int tpm_buf_check_hmac_response(struct tpm_chip *chip, struct tpm_buf *buf,
+               if (rc)
+                       /* manually close the session if it wasn't consumed */
+                       tpm2_flush_context(chip, auth->handle);
+-              memzero_explicit(auth, sizeof(*auth));
++
++              kfree_sensitive(auth);
++              chip->auth = NULL;
+       } else {
+               /* reset for next use  */
+               auth->session = TPM_HEADER_SIZE;
+@@ -881,7 +886,8 @@ void tpm2_end_auth_session(struct tpm_chip *chip)
+               return;
+ 
+       tpm2_flush_context(chip, auth->handle);
+-      memzero_explicit(auth, sizeof(*auth));
++      kfree_sensitive(auth);
++      chip->auth = NULL;
+ }
+ EXPORT_SYMBOL(tpm2_end_auth_session);
+ 
+@@ -962,16 +968,20 @@ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
+  */
+ int tpm2_start_auth_session(struct tpm_chip *chip)
+ {
++      struct tpm2_auth *auth;
+       struct tpm_buf buf;
+-      struct tpm2_auth *auth = chip->auth;
+-      int rc;
+       u32 null_key;
++      int rc;
+ 
+-      if (!auth) {
+-              dev_warn_once(&chip->dev, "auth session is not active\n");
++      if (chip->auth) {
++              dev_warn_once(&chip->dev, "auth session is active\n");
+               return 0;
+       }
+ 
++      auth = kzalloc(sizeof(*auth), GFP_KERNEL);
++      if (!auth)
++              return -ENOMEM;
++
+       rc = tpm2_load_null(chip, &null_key);
+       if (rc)
+               goto out;
+@@ -992,7 +1002,7 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
+       tpm_buf_append(&buf, auth->our_nonce, sizeof(auth->our_nonce));
+ 
+       /* append encrypted salt and squirrel away unencrypted in auth */
+-      tpm_buf_append_salt(&buf, chip);
++      tpm_buf_append_salt(&buf, chip, auth);
+       /* session type (HMAC, audit or policy) */
+       tpm_buf_append_u8(&buf, TPM2_SE_HMAC);
+ 
+@@ -1014,10 +1024,13 @@ int tpm2_start_auth_session(struct tpm_chip *chip)
+ 
+       tpm_buf_destroy(&buf);
+ 
+-      if (rc)
+-              goto out;
++      if (rc == TPM2_RC_SUCCESS) {
++              chip->auth = auth;
++              return 0;
++      }
+ 
+- out:
++out:
++      kfree_sensitive(auth);
+       return rc;
+ }
+ EXPORT_SYMBOL(tpm2_start_auth_session);
+@@ -1367,10 +1380,6 @@ int tpm2_sessions_init(struct tpm_chip *chip)
+               return rc;
+       }
+ 
+-      chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL);
+-      if (!chip->auth)
+-              return -ENOMEM;
+-
+       return rc;
+ }
+ EXPORT_SYMBOL(tpm2_sessions_init);
+-- 
+2.43.0
+
diff --git a/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch b/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch

new file mode 100644 (file)

index 0000000..727c124
--- /dev/null
+++ b/queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch
@@ -0,0 +1,52 @@
+From 96f0ec2253f3ed749a64b9681a5a7564cb0a35f5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 07:49:59 +0200
+Subject: tpm: Return tpm2_sessions_init() when null key creation fails
+
+From: Jarkko Sakkinen <jarkko@kernel.org>
+
+[ Upstream commit d658d59471ed80c4a8aaf082ccc3e83cdf5ae4c1 ]
+
+Do not continue tpm2_sessions_init() further if the null key pair creation
+fails.
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: d2add27cf2b8 ("tpm: Add NULL primary creation")
+Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm2-sessions.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
+index 44f60730cff44..9551eeca6d691 100644
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -1347,14 +1347,21 @@ static int tpm2_create_null_primary(struct tpm_chip *chip)
+  *
+  * Derive and context save the null primary and allocate memory in the
+  * struct tpm_chip for the authorizations.
++ *
++ * Return:
++ * * 0                - OK
++ * * -errno   - A system error
++ * * TPM_RC   - A TPM error
+  */
+ int tpm2_sessions_init(struct tpm_chip *chip)
+ {
+       int rc;
+ 
+       rc = tpm2_create_null_primary(chip);
+-      if (rc)
+-              dev_err(&chip->dev, "TPM: security failed (NULL seed derivation): %d\n", rc);
++      if (rc) {
++              dev_err(&chip->dev, "null key creation failed with %d\n", rc);
++              return rc;
++      }
+ 
+       chip->auth = kmalloc(sizeof(*chip->auth), GFP_KERNEL);
+       if (!chip->auth)
+-- 
+2.43.0
+
diff --git a/queue-6.11/tpm-rollback-tpm2_load_null.patch b/queue-6.11/tpm-rollback-tpm2_load_null.patch

new file mode 100644 (file)

index 0000000..52924b2
--- /dev/null
+++ b/queue-6.11/tpm-rollback-tpm2_load_null.patch
@@ -0,0 +1,85 @@
+From 28f4491938d4191efbed1eb498343d7c602362e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 28 Oct 2024 07:50:00 +0200
+Subject: tpm: Rollback tpm2_load_null()
+
+From: Jarkko Sakkinen <jarkko@kernel.org>
+
+[ Upstream commit cc7d8594342a25693d40fe96f97e5c6c29ee609c ]
+
+Do not continue on tpm2_create_primary() failure in tpm2_load_null().
+
+Cc: stable@vger.kernel.org # v6.10+
+Fixes: eb24c9788cd9 ("tpm: disable the TPM if NULL name changes")
+Reviewed-by: Stefan Berger <stefanb@linux.ibm.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm2-sessions.c | 44 +++++++++++++++++---------------
+ 1 file changed, 24 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/char/tpm/tpm2-sessions.c b/drivers/char/tpm/tpm2-sessions.c
+index 9551eeca6d691..a194535619929 100644
+--- a/drivers/char/tpm/tpm2-sessions.c
++++ b/drivers/char/tpm/tpm2-sessions.c
+@@ -915,33 +915,37 @@ static int tpm2_parse_start_auth_session(struct tpm2_auth *auth,
+ 
+ static int tpm2_load_null(struct tpm_chip *chip, u32 *null_key)
+ {
+-      int rc;
+       unsigned int offset = 0; /* dummy offset for null seed context */
+       u8 name[SHA256_DIGEST_SIZE + 2];
++      u32 tmp_null_key;
++      int rc;
+ 
+       rc = tpm2_load_context(chip, chip->null_key_context, &offset,
+-                             null_key);
+-      if (rc != -EINVAL)
+-              return rc;
++                             &tmp_null_key);
++      if (rc != -EINVAL) {
++              if (!rc)
++                      *null_key = tmp_null_key;
++              goto err;
++      }
+ 
+-      /* an integrity failure may mean the TPM has been reset */
+-      dev_err(&chip->dev, "NULL key integrity failure!\n");
+-      /* check the null name against what we know */
+-      tpm2_create_primary(chip, TPM2_RH_NULL, NULL, name);
+-      if (memcmp(name, chip->null_key_name, sizeof(name)) == 0)
+-              /* name unchanged, assume transient integrity failure */
+-              return rc;
+-      /*
+-       * Fatal TPM failure: the NULL seed has actually changed, so
+-       * the TPM must have been illegally reset.  All in-kernel TPM
+-       * operations will fail because the NULL primary can't be
+-       * loaded to salt the sessions, but disable the TPM anyway so
+-       * userspace programmes can't be compromised by it.
+-       */
+-      dev_err(&chip->dev, "NULL name has changed, disabling TPM due to interference\n");
++      /* Try to re-create null key, given the integrity failure: */
++      rc = tpm2_create_primary(chip, TPM2_RH_NULL, &tmp_null_key, name);
++      if (rc)
++              goto err;
++
++      /* Return null key if the name has not been changed: */
++      if (!memcmp(name, chip->null_key_name, sizeof(name))) {
++              *null_key = tmp_null_key;
++              return 0;
++      }
++
++      /* Deduce from the name change TPM interference: */
++      dev_err(&chip->dev, "null key integrity check failed\n");
++      tpm2_flush_context(chip, tmp_null_key);
+       chip->flags |= TPM_CHIP_FLAG_DISABLE;
+ 
+-      return rc;
++err:
++      return rc ? -ENODEV : 0;
+ }
+ 
+ /**
+-- 
+2.43.0
+
diff --git a/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch b/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch

new file mode 100644 (file)

index 0000000..a7effad
--- /dev/null
+++ b/queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch
@@ -0,0 +1,75 @@
+From f529757c93ed2d2ae478c3405bfd882a15c72cec Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 25 Oct 2024 10:17:24 -0400
+Subject: vmscan,migrate: fix page count imbalance on node stats when demoting
+ pages
+
+From: Gregory Price <gourry@gourry.net>
+
+[ Upstream commit 35e41024c4c2b02ef8207f61b9004f6956cf037b ]
+
+When numa balancing is enabled with demotion, vmscan will call
+migrate_pages when shrinking LRUs.  migrate_pages will decrement the
+the node's isolated page count, leading to an imbalanced count when
+invoked from (MG)LRU code.
+
+The result is dmesg output like such:
+
+$ cat /proc/sys/vm/stat_refresh
+
+[77383.088417] vmstat_refresh: nr_isolated_anon -103212
+[77383.088417] vmstat_refresh: nr_isolated_file -899642
+
+This negative value may impact compaction and reclaim throttling.
+
+The following path produces the decrement:
+
+shrink_folio_list
+  demote_folio_list
+    migrate_pages
+      migrate_pages_batch
+        migrate_folio_move
+          migrate_folio_done
+            mod_node_page_state(-ve) <- decrement
+
+This path happens for SUCCESSFUL migrations, not failures.  Typically
+callers to migrate_pages are required to handle putback/accounting for
+failures, but this is already handled in the shrink code.
+
+When accounting for migrations, instead do not decrement the count when
+the migration reason is MR_DEMOTION.  As of v6.11, this demotion logic
+is the only source of MR_DEMOTION.
+
+Link: https://lkml.kernel.org/r/20241025141724.17927-1-gourry@gourry.net
+Fixes: 26aa2d199d6f ("mm/migrate: demote pages during reclaim")
+Signed-off-by: Gregory Price <gourry@gourry.net>
+Reviewed-by: Yang Shi <shy828301@gmail.com>
+Reviewed-by: Davidlohr Bueso <dave@stgolabs.net>
+Reviewed-by: Shakeel Butt <shakeel.butt@linux.dev>
+Reviewed-by: "Huang, Ying" <ying.huang@intel.com>
+Reviewed-by: Oscar Salvador <osalvador@suse.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Wei Xu <weixugc@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/migrate.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/mm/migrate.c b/mm/migrate.c
+index 368ab3878fa6e..75b858bd6aa58 100644
+--- a/mm/migrate.c
++++ b/mm/migrate.c
+@@ -1099,7 +1099,7 @@ static void migrate_folio_done(struct folio *src,
+        * not accounted to NR_ISOLATED_*. They can be recognized
+        * as __folio_test_movable
+        */
+-      if (likely(!__folio_test_movable(src)))
++      if (likely(!__folio_test_movable(src)) && reason != MR_DEMOTION)
+               mod_node_page_state(folio_pgdat(src), NR_ISOLATED_ANON +
+                                   folio_is_file_lru(src), -folio_nr_pages(src));
+ 
+-- 
+2.43.0
+
diff --git a/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch b/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch

new file mode 100644 (file)

index 0000000..d22fa79
--- /dev/null
+++ b/queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch
@@ -0,0 +1,195 @@
+From 7ac38556c9a20e6ee1d3397f728262ec03fe2368 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 24 Jul 2024 00:01:55 +0000
+Subject: x86/traps: Enable UBSAN traps on x86
+
+From: Gatlin Newhouse <gatlin.newhouse@gmail.com>
+
+[ Upstream commit 7424fc6b86c8980a87169e005f5cd4438d18efe6 ]
+
+Currently ARM64 extracts which specific sanitizer has caused a trap via
+encoded data in the trap instruction. Clang on x86 currently encodes the
+same data in the UD1 instruction but x86 handle_bug() and
+is_valid_bugaddr() currently only look at UD2.
+
+Bring x86 to parity with ARM64, similar to commit 25b84002afb9 ("arm64:
+Support Clang UBSAN trap codes for better reporting"). See the llvm
+links for information about the code generation.
+
+Enable the reporting of UBSAN sanitizer details on x86 compiled with clang
+when CONFIG_UBSAN_TRAP=y by analysing UD1 and retrieving the type immediate
+which is encoded by the compiler after the UD1.
+
+[ tglx: Simplified it by moving the printk() into handle_bug() ]
+
+Signed-off-by: Gatlin Newhouse <gatlin.newhouse@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Cc: Kees Cook <keescook@chromium.org>
+Link: https://lore.kernel.org/all/20240724000206.451425-1-gatlin.newhouse@gmail.com
+Link: https://github.com/llvm/llvm-project/commit/c5978f42ec8e9#diff-bb68d7cd885f41cfc35843998b0f9f534adb60b415f647109e597ce448e92d9f
+Link: https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/X86/X86InstrSystem.td#L27
+Stable-dep-of: 1db272864ff2 ("x86/traps: move kmsan check after instrumentation_begin")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/bug.h | 12 ++++++++
+ arch/x86/kernel/traps.c    | 59 ++++++++++++++++++++++++++++++++++----
+ include/linux/ubsan.h      |  5 ++++
+ lib/Kconfig.ubsan          |  4 +--
+ 4 files changed, 73 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/include/asm/bug.h b/arch/x86/include/asm/bug.h
+index a3ec87d198ac8..806649c7f23dc 100644
+--- a/arch/x86/include/asm/bug.h
++++ b/arch/x86/include/asm/bug.h
+@@ -13,6 +13,18 @@
+ #define INSN_UD2      0x0b0f
+ #define LEN_UD2               2
+ 
++/*
++ * In clang we have UD1s reporting UBSAN failures on X86, 64 and 32bit.
++ */
++#define INSN_ASOP             0x67
++#define OPCODE_ESCAPE         0x0f
++#define SECOND_BYTE_OPCODE_UD1        0xb9
++#define SECOND_BYTE_OPCODE_UD2        0x0b
++
++#define BUG_NONE              0xffff
++#define BUG_UD1                       0xfffe
++#define BUG_UD2                       0xfffd
++
+ #ifdef CONFIG_GENERIC_BUG
+ 
+ #ifdef CONFIG_X86_32
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 4fa0b17e5043a..415881607c5df 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -42,6 +42,7 @@
+ #include <linux/hardirq.h>
+ #include <linux/atomic.h>
+ #include <linux/iommu.h>
++#include <linux/ubsan.h>
+ 
+ #include <asm/stacktrace.h>
+ #include <asm/processor.h>
+@@ -91,6 +92,47 @@ __always_inline int is_valid_bugaddr(unsigned long addr)
+       return *(unsigned short *)addr == INSN_UD2;
+ }
+ 
++/*
++ * Check for UD1 or UD2, accounting for Address Size Override Prefixes.
++ * If it's a UD1, get the ModRM byte to pass along to UBSan.
++ */
++__always_inline int decode_bug(unsigned long addr, u32 *imm)
++{
++      u8 v;
++
++      if (addr < TASK_SIZE_MAX)
++              return BUG_NONE;
++
++      v = *(u8 *)(addr++);
++      if (v == INSN_ASOP)
++              v = *(u8 *)(addr++);
++      if (v != OPCODE_ESCAPE)
++              return BUG_NONE;
++
++      v = *(u8 *)(addr++);
++      if (v == SECOND_BYTE_OPCODE_UD2)
++              return BUG_UD2;
++
++      if (!IS_ENABLED(CONFIG_UBSAN_TRAP) || v != SECOND_BYTE_OPCODE_UD1)
++              return BUG_NONE;
++
++      /* Retrieve the immediate (type value) for the UBSAN UD1 */
++      v = *(u8 *)(addr++);
++      if (X86_MODRM_RM(v) == 4)
++              addr++;
++
++      *imm = 0;
++      if (X86_MODRM_MOD(v) == 1)
++              *imm = *(u8 *)addr;
++      else if (X86_MODRM_MOD(v) == 2)
++              *imm = *(u32 *)addr;
++      else
++              WARN_ONCE(1, "Unexpected MODRM_MOD: %u\n", X86_MODRM_MOD(v));
++
++      return BUG_UD1;
++}
++
++
+ static nokprobe_inline int
+ do_trap_no_signal(struct task_struct *tsk, int trapnr, const char *str,
+                 struct pt_regs *regs, long error_code)
+@@ -216,6 +258,8 @@ static inline void handle_invalid_op(struct pt_regs *regs)
+ static noinstr bool handle_bug(struct pt_regs *regs)
+ {
+       bool handled = false;
++      int ud_type;
++      u32 imm;
+ 
+       /*
+        * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
+@@ -223,7 +267,8 @@ static noinstr bool handle_bug(struct pt_regs *regs)
+        * irqentry_enter().
+        */
+       kmsan_unpoison_entry_regs(regs);
+-      if (!is_valid_bugaddr(regs->ip))
++      ud_type = decode_bug(regs->ip, &imm);
++      if (ud_type == BUG_NONE)
+               return handled;
+ 
+       /*
+@@ -236,10 +281,14 @@ static noinstr bool handle_bug(struct pt_regs *regs)
+        */
+       if (regs->flags & X86_EFLAGS_IF)
+               raw_local_irq_enable();
+-      if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
+-          handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
+-              regs->ip += LEN_UD2;
+-              handled = true;
++      if (ud_type == BUG_UD2) {
++              if (report_bug(regs->ip, regs) == BUG_TRAP_TYPE_WARN ||
++                  handle_cfi_failure(regs) == BUG_TRAP_TYPE_WARN) {
++                      regs->ip += LEN_UD2;
++                      handled = true;
++              }
++      } else if (IS_ENABLED(CONFIG_UBSAN_TRAP)) {
++              pr_crit("%s at %pS\n", report_ubsan_failure(regs, imm), (void *)regs->ip);
+       }
+       if (regs->flags & X86_EFLAGS_IF)
+               raw_local_irq_disable();
+diff --git a/include/linux/ubsan.h b/include/linux/ubsan.h
+index bff7445498ded..d8219cbe09ff8 100644
+--- a/include/linux/ubsan.h
++++ b/include/linux/ubsan.h
+@@ -4,6 +4,11 @@
+ 
+ #ifdef CONFIG_UBSAN_TRAP
+ const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type);
++#else
++static inline const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type)
++{
++      return NULL;
++}
+ #endif
+ 
+ #endif
+diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
+index bdda600f8dfbe..1d4aa7a83b3a5 100644
+--- a/lib/Kconfig.ubsan
++++ b/lib/Kconfig.ubsan
+@@ -29,8 +29,8 @@ config UBSAN_TRAP
+ 
+         Also note that selecting Y will cause your kernel to Oops
+         with an "illegal instruction" error with no further details
+-        when a UBSAN violation occurs. (Except on arm64, which will
+-        report which Sanitizer failed.) This may make it hard to
++        when a UBSAN violation occurs. (Except on arm64 and x86, which
++        will report which Sanitizer failed.) This may make it hard to
+         determine whether an Oops was caused by UBSAN or to figure
+         out the details of a UBSAN violation. It makes the kernel log
+         output less useful for bug reports.
+-- 
+2.43.0
+
diff --git a/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch b/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch

new file mode 100644 (file)

index 0000000..0dc9fed
--- /dev/null
+++ b/queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch
@@ -0,0 +1,78 @@
+From 2831a9462bb651fdd49c33fc8be9e64c2dc7212a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 16 Oct 2024 20:24:07 +0500
+Subject: x86/traps: move kmsan check after instrumentation_begin
+
+From: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+
+[ Upstream commit 1db272864ff250b5e607283eaec819e1186c8e26 ]
+
+During x86_64 kernel build with CONFIG_KMSAN, the objtool warns following:
+
+  AR      built-in.a
+  AR      vmlinux.a
+  LD      vmlinux.o
+vmlinux.o: warning: objtool: handle_bug+0x4: call to
+    kmsan_unpoison_entry_regs() leaves .noinstr.text section
+  OBJCOPY modules.builtin.modinfo
+  GEN     modules.builtin
+  MODPOST Module.symvers
+  CC      .vmlinux.export.o
+
+Moving kmsan_unpoison_entry_regs() _after_ instrumentation_begin() fixes
+the warning.
+
+There is decode_bug(regs->ip, &imm) is left before KMSAN unpoisoining, but
+it has the return condition and if we include it after
+instrumentation_begin() it results the warning "return with
+instrumentation enabled", hence, I'm concerned that regs will not be KMSAN
+unpoisoned if `ud_type == BUG_NONE` is true.
+
+Link: https://lkml.kernel.org/r/20241016152407.3149001-1-snovitoll@gmail.com
+Fixes: ba54d194f8da ("x86/traps: avoid KMSAN bugs originating from handle_bug()")
+Signed-off-by: Sabyrzhan Tasbolatov <snovitoll@gmail.com>
+Reviewed-by: Alexander Potapenko <glider@google.com>
+Cc: Borislav Petkov (AMD) <bp@alien8.de>
+Cc: Dave Hansen <dave.hansen@linux.intel.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/traps.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
+index 415881607c5df..29ec49209ae01 100644
+--- a/arch/x86/kernel/traps.c
++++ b/arch/x86/kernel/traps.c
+@@ -261,12 +261,6 @@ static noinstr bool handle_bug(struct pt_regs *regs)
+       int ud_type;
+       u32 imm;
+ 
+-      /*
+-       * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
+-       * is a rare case that uses @regs without passing them to
+-       * irqentry_enter().
+-       */
+-      kmsan_unpoison_entry_regs(regs);
+       ud_type = decode_bug(regs->ip, &imm);
+       if (ud_type == BUG_NONE)
+               return handled;
+@@ -275,6 +269,12 @@ static noinstr bool handle_bug(struct pt_regs *regs)
+        * All lies, just get the WARN/BUG out.
+        */
+       instrumentation_begin();
++      /*
++       * Normally @regs are unpoisoned by irqentry_enter(), but handle_bug()
++       * is a rare case that uses @regs without passing them to
++       * irqentry_enter().
++       */
++      kmsan_unpoison_entry_regs(regs);
+       /*
+        * Since we're emulating a CALL with exceptions, restore the interrupt
+        * state to what it was at the exception site.
+-- 
+2.43.0
+
diff --git a/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch b/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch

new file mode 100644 (file)

index 0000000..c26048c
--- /dev/null
+++ b/queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch
@@ -0,0 +1,121 @@
+From 2d7f5f36f1ba0597997cee4a9832384ed862b05e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 23 Oct 2024 15:37:22 +0200
+Subject: xfs: fix finding a last resort AG in xfs_filestream_pick_ag
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit dc60992ce76fbc2f71c2674f435ff6bde2108028 ]
+
+When the main loop in xfs_filestream_pick_ag fails to find a suitable
+AG it tries to just pick the online AG.  But the loop for that uses
+args->pag as loop iterator while the later code expects pag to be
+set.  Fix this by reusing the max_pag case for this last resort, and
+also add a check for impossible case of no AG just to make sure that
+the uninitialized pag doesn't even escape in theory.
+
+Reported-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Tested-by: syzbot+4125a3c514e3436a02e6@syzkaller.appspotmail.com
+Fixes: f8f1ed1ab3baba ("xfs: return a referenced perag from filestreams allocator")
+Cc: <stable@vger.kernel.org> # v6.3
+Reviewed-by: Darrick J. Wong <djwong@kernel.org>
+Signed-off-by: Carlos Maiolino <cem@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/xfs/xfs_filestream.c | 23 ++++++++++++-----------
+ fs/xfs/xfs_trace.h      | 15 +++++----------
+ 2 files changed, 17 insertions(+), 21 deletions(-)
+
+diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
+index e3aaa05555978..88bd23ce74cde 100644
+--- a/fs/xfs/xfs_filestream.c
++++ b/fs/xfs/xfs_filestream.c
+@@ -64,7 +64,7 @@ xfs_filestream_pick_ag(
+       struct xfs_perag        *pag;
+       struct xfs_perag        *max_pag = NULL;
+       xfs_extlen_t            minlen = *longest;
+-      xfs_extlen_t            free = 0, minfree, maxfree = 0;
++      xfs_extlen_t            minfree, maxfree = 0;
+       xfs_agnumber_t          agno;
+       bool                    first_pass = true;
+       int                     err;
+@@ -107,7 +107,6 @@ xfs_filestream_pick_ag(
+                            !(flags & XFS_PICK_USERDATA) ||
+                            (flags & XFS_PICK_LOWSPACE))) {
+                               /* Break out, retaining the reference on the AG. */
+-                              free = pag->pagf_freeblks;
+                               break;
+                       }
+               }
+@@ -150,23 +149,25 @@ xfs_filestream_pick_ag(
+                * grab.
+                */
+               if (!max_pag) {
+-                      for_each_perag_wrap(args->mp, 0, start_agno, args->pag)
++                      for_each_perag_wrap(args->mp, 0, start_agno, pag) {
++                              max_pag = pag;
+                               break;
+-                      atomic_inc(&args->pag->pagf_fstrms);
+-                      *longest = 0;
+-              } else {
+-                      pag = max_pag;
+-                      free = maxfree;
+-                      atomic_inc(&pag->pagf_fstrms);
++                      }
++
++                      /* Bail if there are no AGs at all to select from. */
++                      if (!max_pag)
++                              return -ENOSPC;
+               }
++
++              pag = max_pag;
++              atomic_inc(&pag->pagf_fstrms);
+       } else if (max_pag) {
+               xfs_perag_rele(max_pag);
+       }
+ 
+-      trace_xfs_filestream_pick(pag, pino, free);
++      trace_xfs_filestream_pick(pag, pino);
+       args->pag = pag;
+       return 0;
+-
+ }
+ 
+ static struct xfs_inode *
+diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
+index 180ce697305a9..f681a195a7441 100644
+--- a/fs/xfs/xfs_trace.h
++++ b/fs/xfs/xfs_trace.h
+@@ -691,8 +691,8 @@ DEFINE_FILESTREAM_EVENT(xfs_filestream_lookup);
+ DEFINE_FILESTREAM_EVENT(xfs_filestream_scan);
+ 
+ TRACE_EVENT(xfs_filestream_pick,
+-      TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino, xfs_extlen_t free),
+-      TP_ARGS(pag, ino, free),
++      TP_PROTO(struct xfs_perag *pag, xfs_ino_t ino),
++      TP_ARGS(pag, ino),
+       TP_STRUCT__entry(
+               __field(dev_t, dev)
+               __field(xfs_ino_t, ino)
+@@ -703,14 +703,9 @@ TRACE_EVENT(xfs_filestream_pick,
+       TP_fast_assign(
+               __entry->dev = pag->pag_mount->m_super->s_dev;
+               __entry->ino = ino;
+-              if (pag) {
+-                      __entry->agno = pag->pag_agno;
+-                      __entry->streams = atomic_read(&pag->pagf_fstrms);
+-              } else {
+-                      __entry->agno = NULLAGNUMBER;
+-                      __entry->streams = 0;
+-              }
+-              __entry->free = free;
++              __entry->agno = pag->pag_agno;
++              __entry->streams = atomic_read(&pag->pagf_fstrms);
++              __entry->free = pag->pagf_freeblks;
+       ),
+       TP_printk("dev %d:%d ino 0x%llx agno 0x%x streams %d free %d",
+                 MAJOR(__entry->dev), MINOR(__entry->dev),
+-- 
+2.43.0
+
author	Sasha Levin <sashal@kernel.org>
	Wed, 6 Nov 2024 01:54:04 +0000 (20:54 -0500)
committer	Sasha Levin <sashal@kernel.org>
	Wed, 6 Nov 2024 01:55:02 +0000 (20:55 -0500)
queue-6.11/accel-ivpu-fix-noc-firewall-interrupt-handling.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-gemini-17.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/alsa-hda-realtek-fix-headset-mic-on-tuxedo-stellaris.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/alsa-hda-realtek-limit-internal-mic-boost-on-dell-pl.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/block-fix-sanity-checks-in-blk_rq_map_user_bvec.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/btrfs-fix-defrag-not-merging-contiguous-extents-due-.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/btrfs-fix-error-propagation-of-split-bios.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/btrfs-fix-extent-map-merging-not-happening-for-adjac.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/btrfs-fix-use-after-free-of-block-device-file-in-__b.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/btrfs-merge-btrfs_orig_bbio_end_io-into-btrfs_bio_en.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/cgroup-bpf-use-a-dedicated-workqueue-for-cgroup-bpf-.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/cxl-acpi-ensure-ports-ready-at-cxl_acpi_probe-return.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/cxl-port-fix-cxl_bus_rescan-vs-bus_rescan_devices.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-amd-pm-vangogh-fix-kernel-memory-out-of-bounds-w.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-amdgpu-smu13-fix-profile-reporting.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-connector-hdmi-fix-memory-leak-in-drm_display_mo.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-tests-hdmi-fix-memory-leaks-in-drm_display_mode_.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-tests-helpers-add-helper-for-drm_display_mode_fr.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-xe-add-mmio-read-before-ggtt-invalidate.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-xe-don-t-short-circuit-tdr-on-jobs-not-started.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-xe-fix-register-definition-order-in-xe_regs.h.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/drm-xe-kill-regs-xe_sriov_regs.h.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/fork-do-not-invoke-uffd-on-fork-if-error-occurs.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/fork-only-invoke-khugepaged-ksm-hooks-if-no-error.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/gpiolib-fix-debugfs-dangling-chip-separator.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/gpiolib-fix-debugfs-newline-separators.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/iio-light-veml6030-fix-microlux-value-calculation.patch-18046	[new file with mode: 0644]	patch \| blob
queue-6.11/input-edt-ft5x06-fix-regmap-leak-when-probe-fails.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/input-fix-regression-when-re-registering-input-handl.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/io_uring-rw-fix-missing-nowait-check-for-o_direct-st.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/iov_iter-fix-copy_page_from_iter_atomic-if-kmap_loca.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/kasan-remove-vmalloc_percpu-test.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mctp-i2c-handle-null-header-address.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mei-use-kvmalloc-for-read-buffer.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-mmap-limit-thp-alignment-of-anonymous-mappings-to.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-multi-gen-lru-ignore-non-leaf-pmd_young-for-force.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-multi-gen-lru-remove-mm_leaf_old-and-mm_nonleaf_t.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-multi-gen-lru-use-ptep-pmdp-_clear_young_notify.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-page_alloc-let-gfp_atomic-order-0-allocs-access-h.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mm-shrink-skip-folio-mapped-by-an-exiting-process.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/mptcp-init-protect-sched-with-rcu_read_lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/nvme-re-fix-error-handling-for-io_uring-nvme-passthr.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/nvmet-auth-assign-dh_key-to-null-after-kfree_sensiti.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/ocfs2-pass-u64-to-ocfs2_truncate_inline-maybe-overfl.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/phy-freescale-imx8m-pcie-do-cmn_rst-just-before-phy-.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/posix-cpu-timers-clear-tick_dep_bit_posix_timer-on-c.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/resource-kexec-walk_system_ram_res_rev-must-retain-r.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-efi-set-nx-compat-flag-in-pe-coff-header.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-prevent-a-bad-reference-count-on-cpu-nodes.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-remove-duplicated-get_rm.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-remove-unused-generating_asm_offsets.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-use-u-to-format-the-output-of-cpu.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/riscv-vdso-prevent-the-compiler-from-inserting-calls.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/sched-numa-fix-the-potential-null-pointer-dereferenc.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/scsi-ufs-core-fix-another-deadlock-during-rtc-update.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/series		patch \| blob \| blame \| history
queue-6.11/spi-spi-fsl-dspi-fix-crash-when-not-using-gpio-chip-.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/tpm-lazily-flush-the-auth-session.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/tpm-return-tpm2_sessions_init-when-null-key-creation.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/tpm-rollback-tpm2_load_null.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/vmscan-migrate-fix-page-count-imbalance-on-node-stat.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/x86-traps-enable-ubsan-traps-on-x86.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/x86-traps-move-kmsan-check-after-instrumentation_beg.patch	[new file with mode: 0644]	patch \| blob
queue-6.11/xfs-fix-finding-a-last-resort-ag-in-xfs_filestream_p.patch	[new file with mode: 0644]	patch \| blob