Fixes for 5.19

author Sasha Levin <sashal@kernel.org>

Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
diff --git a/queue-5.19/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch b/queue-5.19/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch

new file mode 100644 (file)

index 0000000..b6af4d9
--- /dev/null
+++ b/queue-5.19/__follow_mount_rcu-verify-that-mount_lock-remains-un.patch
@@ -0,0 +1,51 @@
+From 14eb61274e33ef7ccdbc22c5a5afca83678ede58 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 17:26:29 -0400
+Subject: __follow_mount_rcu(): verify that mount_lock remains unchanged
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit 20aac6c60981f5bfacd66661d090d907bf1482f0 ]
+
+Validate mount_lock seqcount as soon as we cross into mount in RCU
+mode.  Sure, ->mnt_root is pinned and will remain so until we
+do rcu_read_unlock() anyway, and we will eventually fail to unlazy if
+the mount_lock had been touched, but we might run into a hard error
+(e.g. -ENOENT) before trying to unlazy.  And it's possible to end
+up with RCU pathwalk racing with rename() and umount() in a way
+that would fail with -ENOENT while non-RCU pathwalk would've
+succeeded with any timings.
+
+Once upon a time we hadn't needed that, but analysis had been subtle,
+brittle and went out of window as soon as RENAME_EXCHANGE had been
+added.
+
+It's narrow, hard to hit and won't get you anything other than
+stray -ENOENT that could be arranged in much easier way with the
+same priveleges, but it's a bug all the same.
+
+Cc: stable@kernel.org
+X-sky-is-falling: unlikely
+Fixes: da1ce0670c14 "vfs: add cross-rename"
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/namei.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/namei.c b/fs/namei.c
+index 1f28d3f463c3..4dbf55b37ec6 100644
+--- a/fs/namei.c
++++ b/fs/namei.c
+@@ -1505,6 +1505,8 @@ static bool __follow_mount_rcu(struct nameidata *nd, struct path *path,
+                                * becoming unpinned.
+                                */
+                               flags = dentry->d_flags;
++                              if (read_seqretry(&mount_lock, nd->m_seq))
++                                      return false;
+                               continue;
+                       }
+                       if (read_seqretry(&mount_lock, nd->m_seq))
+-- 
+2.35.1
+
diff --git a/queue-5.19/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch b/queue-5.19/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch

new file mode 100644 (file)

index 0000000..72373b7
--- /dev/null
+++ b/queue-5.19/acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch
@@ -0,0 +1,131 @@
+From 9602ea6a05e28d70d8bfaf97631441db6c367207 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 19:41:10 +0200
+Subject: ACPI: CPPC: Do not prevent CPPC from working in the future
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+[ Upstream commit 4f4179fcf420873002035cf1941d844c9e0e7cb3 ]
+
+There is a problem with the current revision checks in
+is_cppc_supported() that they essentially prevent the CPPC support
+from working if a new _CPC package format revision being a proper
+superset of the v3 and only causing _CPC to return a package with more
+entries (while retaining the types and meaning of the entries defined by
+the v3) is introduced in the future and used by the platform firmware.
+
+In that case, as long as the number of entries in the _CPC return
+package is at least CPPC_V3_NUM_ENT, it should be perfectly fine to
+use the v3 support code and disregard the additional package entries
+added by the new package format revision.
+
+For this reason, drop is_cppc_supported() altogether, put the revision
+checks directly into acpi_cppc_processor_probe() so they are easier to
+follow and rework them to take the case mentioned above into account.
+
+Fixes: 4773e77cdc9b ("ACPI / CPPC: Add support for CPPC v3")
+Cc: 4.18+ <stable@vger.kernel.org> # 4.18+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/cppc_acpi.c | 54 ++++++++++++++++++----------------------
+ include/acpi/cppc_acpi.h |  2 +-
+ 2 files changed, 25 insertions(+), 31 deletions(-)
+
+diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
+index 3c6d4ef87be0..1e15a9f25ae9 100644
+--- a/drivers/acpi/cppc_acpi.c
++++ b/drivers/acpi/cppc_acpi.c
+@@ -618,33 +618,6 @@ static int pcc_data_alloc(int pcc_ss_id)
+       return 0;
+ }
+ 
+-/* Check if CPPC revision + num_ent combination is supported */
+-static bool is_cppc_supported(int revision, int num_ent)
+-{
+-      int expected_num_ent;
+-
+-      switch (revision) {
+-      case CPPC_V2_REV:
+-              expected_num_ent = CPPC_V2_NUM_ENT;
+-              break;
+-      case CPPC_V3_REV:
+-              expected_num_ent = CPPC_V3_NUM_ENT;
+-              break;
+-      default:
+-              pr_debug("Firmware exports unsupported CPPC revision: %d\n",
+-                      revision);
+-              return false;
+-      }
+-
+-      if (expected_num_ent != num_ent) {
+-              pr_debug("Firmware exports %d entries. Expected: %d for CPPC rev:%d\n",
+-                      num_ent, expected_num_ent, revision);
+-              return false;
+-      }
+-
+-      return true;
+-}
+-
+ /*
+  * An example CPC table looks like the following.
+  *
+@@ -733,7 +706,6 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+                        cpc_obj->type, pr->id);
+               goto out_free;
+       }
+-      cpc_ptr->num_entries = num_ent;
+ 
+       /* Second entry should be revision. */
+       cpc_obj = &out_obj->package.elements[1];
+@@ -744,10 +716,32 @@ int acpi_cppc_processor_probe(struct acpi_processor *pr)
+                        cpc_obj->type, pr->id);
+               goto out_free;
+       }
+-      cpc_ptr->version = cpc_rev;
+ 
+-      if (!is_cppc_supported(cpc_rev, num_ent))
++      if (cpc_rev < CPPC_V2_REV) {
++              pr_debug("Unsupported _CPC Revision (%d) for CPU:%d\n", cpc_rev,
++                       pr->id);
++              goto out_free;
++      }
++
++      /*
++       * Disregard _CPC if the number of entries in the return pachage is not
++       * as expected, but support future revisions being proper supersets of
++       * the v3 and only causing more entries to be returned by _CPC.
++       */
++      if ((cpc_rev == CPPC_V2_REV && num_ent != CPPC_V2_NUM_ENT) ||
++          (cpc_rev == CPPC_V3_REV && num_ent != CPPC_V3_NUM_ENT) ||
++          (cpc_rev > CPPC_V3_REV && num_ent <= CPPC_V3_NUM_ENT)) {
++              pr_debug("Unexpected number of _CPC return package entries (%d) for CPU:%d\n",
++                       num_ent, pr->id);
+               goto out_free;
++      }
++      if (cpc_rev > CPPC_V3_REV) {
++              num_ent = CPPC_V3_NUM_ENT;
++              cpc_rev = CPPC_V3_REV;
++      }
++
++      cpc_ptr->num_entries = num_ent;
++      cpc_ptr->version = cpc_rev;
+ 
+       /* Iterate through remaining entries in _CPC */
+       for (i = 2; i < num_ent; i++) {
+diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
+index d389bab54241..f73d357ecdf5 100644
+--- a/include/acpi/cppc_acpi.h
++++ b/include/acpi/cppc_acpi.h
+@@ -17,7 +17,7 @@
+ #include <acpi/pcc.h>
+ #include <acpi/processor.h>
+ 
+-/* Support CPPCv2 and CPPCv3  */
++/* CPPCv2 and CPPCv3 support */
+ #define CPPC_V2_REV   2
+ #define CPPC_V3_REV   3
+ #define CPPC_V2_NUM_ENT       21
+-- 
+2.35.1
+
diff --git a/queue-5.19/alsa-bcd2000-fix-a-uaf-bug-on-the-error-path-of-prob.patch b/queue-5.19/alsa-bcd2000-fix-a-uaf-bug-on-the-error-path-of-prob.patch

new file mode 100644 (file)

index 0000000..9d10a34
--- /dev/null
+++ b/queue-5.19/alsa-bcd2000-fix-a-uaf-bug-on-the-error-path-of-prob.patch
@@ -0,0 +1,48 @@
+From 9305db9f54d884157d6086c63315de25fc2537bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 09:05:15 +0800
+Subject: ALSA: bcd2000: Fix a UAF bug on the error path of probing
+
+From: Zheyu Ma <zheyuma97@gmail.com>
+
+[ Upstream commit ffb2759df7efbc00187bfd9d1072434a13a54139 ]
+
+When the driver fails in snd_card_register() at probe time, it will free
+the 'bcd2k->midi_out_urb' before killing it, which may cause a UAF bug.
+
+The following log can reveal it:
+
+[   50.727020] BUG: KASAN: use-after-free in bcd2000_input_complete+0x1f1/0x2e0 [snd_bcd2000]
+[   50.727623] Read of size 8 at addr ffff88810fab0e88 by task swapper/4/0
+[   50.729530] Call Trace:
+[   50.732899]  bcd2000_input_complete+0x1f1/0x2e0 [snd_bcd2000]
+
+Fix this by adding usb_kill_urb() before usb_free_urb().
+
+Fixes: b47a22290d58 ("ALSA: MIDI driver for Behringer BCD2000 USB device")
+Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220715010515.2087925-1-zheyuma97@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/bcd2000/bcd2000.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/sound/usb/bcd2000/bcd2000.c b/sound/usb/bcd2000/bcd2000.c
+index cd4a0bc6d278..7aec0a95c609 100644
+--- a/sound/usb/bcd2000/bcd2000.c
++++ b/sound/usb/bcd2000/bcd2000.c
+@@ -348,7 +348,8 @@ static int bcd2000_init_midi(struct bcd2000 *bcd2k)
+ static void bcd2000_free_usb_related_resources(struct bcd2000 *bcd2k,
+                                               struct usb_interface *interface)
+ {
+-      /* usb_kill_urb not necessary, urb is aborted automatically */
++      usb_kill_urb(bcd2k->midi_out_urb);
++      usb_kill_urb(bcd2k->midi_in_urb);
+ 
+       usb_free_urb(bcd2k->midi_out_urb);
+       usb_free_urb(bcd2k->midi_in_urb);
+-- 
+2.35.1
+
diff --git a/queue-5.19/alsa-hda-realtek-add-quirk-for-clevo-nv45pz.patch-15916 b/queue-5.19/alsa-hda-realtek-add-quirk-for-clevo-nv45pz.patch-15916

new file mode 100644 (file)

index 0000000..5718dd5
--- /dev/null
+++ b/queue-5.19/alsa-hda-realtek-add-quirk-for-clevo-nv45pz.patch-15916
@@ -0,0 +1,35 @@
+From ec205a0c7a273c9b18e5866c366403ce72bde318 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 30 Jul 2022 21:22:43 -0600
+Subject: ALSA: hda/realtek: Add quirk for Clevo NV45PZ
+
+From: Tim Crawford <tcrawford@system76.com>
+
+[ Upstream commit be561ffad708f0cee18aee4231f80ffafaf7a419 ]
+
+Fixes headset detection on Clevo NV45PZ.
+
+Signed-off-by: Tim Crawford <tcrawford@system76.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220731032243.4300-1-tcrawford@system76.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 2f55bc43bfa9..6a65b962e96d 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9203,6 +9203,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x1558, 0x4018, "Clevo NV40M[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4019, "Clevo NV40MZ", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x4020, "Clevo NV40MB", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
++      SND_PCI_QUIRK(0x1558, 0x4041, "Clevo NV4[15]PZ", ALC256_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40a1, "Clevo NL40GU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40c1, "Clevo NL40[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1558, 0x40d1, "Clevo NL41DU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
+-- 
+2.35.1
+
diff --git a/queue-5.19/alsa-hda-realtek-add-quirk-for-hp-spectre-x360-15-eb.patch b/queue-5.19/alsa-hda-realtek-add-quirk-for-hp-spectre-x360-15-eb.patch

new file mode 100644 (file)

index 0000000..1805320
--- /dev/null
+++ b/queue-5.19/alsa-hda-realtek-add-quirk-for-hp-spectre-x360-15-eb.patch
@@ -0,0 +1,38 @@
+From e0d45922b6479d94a947996ea00a627b829dfb4d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 18:40:01 +0200
+Subject: ALSA: hda/realtek: Add quirk for HP Spectre x360 15-eb0xxx
+
+From: Ivan Hasenkampf <ivan.hasenkampf@gmail.com>
+
+[ Upstream commit 24df5428ef9d1ca1edd54eca7eb667110f2dfae3 ]
+
+Fixes speaker output on HP Spectre x360 15-eb0xxx
+
+[ re-sorted in SSID order by tiwai ]
+
+Signed-off-by: Ivan Hasenkampf <ivan.hasenkampf@gmail.com>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220803164001.290394-1-ivan.hasenkampf@gmail.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 6a65b962e96d..93680621c90f 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -9044,6 +9044,8 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x103c, 0x861f, "HP Elite Dragonfly G1", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x869d, "HP", ALC236_FIXUP_HP_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x86c7, "HP Envy AiO 32", ALC274_FIXUP_HP_ENVY_GPIO),
++      SND_PCI_QUIRK(0x103c, 0x86e7, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
++      SND_PCI_QUIRK(0x103c, 0x86e8, "HP Spectre x360 15-eb0xxx", ALC285_FIXUP_HP_SPECTRE_X360_EB1),
+       SND_PCI_QUIRK(0x103c, 0x8716, "HP Elite Dragonfly G2 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x8720, "HP EliteBook x360 1040 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_AMP_INIT),
+       SND_PCI_QUIRK(0x103c, 0x8724, "HP EliteBook 850 G7", ALC285_FIXUP_HP_GPIO_LED),
+-- 
+2.35.1
+
diff --git a/queue-5.19/alsa-usb-audio-add-quirk-for-behringer-umc202hd.patch-24063 b/queue-5.19/alsa-usb-audio-add-quirk-for-behringer-umc202hd.patch-24063

new file mode 100644 (file)

index 0000000..c34f41c
--- /dev/null
+++ b/queue-5.19/alsa-usb-audio-add-quirk-for-behringer-umc202hd.patch-24063
@@ -0,0 +1,37 @@
+From 03c77c56dcd131bc52f4fee118914a7ca7899ceb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 16:39:48 +0200
+Subject: ALSA: usb-audio: Add quirk for Behringer UMC202HD
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit e086c37f876fd1f551e2b4f9be97d4a1923cd219 ]
+
+Just like other Behringer models, UMC202HD (USB ID 1397:0507) requires
+the quirk for the stable streaming, too.
+
+BugLink: https://bugzilla.kernel.org/show_bug.cgi?id=215934
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220722143948.29804-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/usb/quirks.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/sound/usb/quirks.c b/sound/usb/quirks.c
+index 968d90caeefa..168fd802d70b 100644
+--- a/sound/usb/quirks.c
++++ b/sound/usb/quirks.c
+@@ -1843,6 +1843,8 @@ static const struct usb_audio_quirk_flags_table quirk_flags_table[] = {
+                  QUIRK_FLAG_SHARE_MEDIA_DEVICE | QUIRK_FLAG_ALIGN_TRANSFER),
+       DEVICE_FLG(0x1395, 0x740a, /* Sennheiser DECT */
+                  QUIRK_FLAG_GET_SAMPLE_RATE),
++      DEVICE_FLG(0x1397, 0x0507, /* Behringer UMC202HD */
++                 QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x1397, 0x0508, /* Behringer UMC204HD */
+                  QUIRK_FLAG_PLAYBACK_FIRST | QUIRK_FLAG_GENERIC_IMPLICIT_FB),
+       DEVICE_FLG(0x1397, 0x0509, /* Behringer UMC404HD */
+-- 
+2.35.1
+
diff --git a/queue-5.19/arm-dts-uniphier-fix-usb-interrupts-for-pxs2-soc.patch-2243 b/queue-5.19/arm-dts-uniphier-fix-usb-interrupts-for-pxs2-soc.patch-2243

new file mode 100644 (file)

index 0000000..c0359bc
--- /dev/null
+++ b/queue-5.19/arm-dts-uniphier-fix-usb-interrupts-for-pxs2-soc.patch-2243
@@ -0,0 +1,51 @@
+From b4923067cbd4e5d5d7227feb103adcbdacae9b80 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 22:36:25 +0900
+Subject: ARM: dts: uniphier: Fix USB interrupts for PXs2 SoC
+
+From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+
+[ Upstream commit 9b0dc7abb5cc43a2dbf90690c3c6011dcadc574d ]
+
+An interrupt for USB device are shared with USB host. Set interrupt-names
+property to common "dwc_usb3" instead of "host" and "peripheral".
+
+Cc: stable@vger.kernel.org
+Fixes: 45be1573ad19 ("ARM: dts: uniphier: Add USB3 controller nodes")
+Reported-by: Ryuta NAKANISHI <nakanishi.ryuta@socionext.com>
+Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/boot/dts/uniphier-pxs2.dtsi | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi
+index e81e5937a60a..03301ddb3403 100644
+--- a/arch/arm/boot/dts/uniphier-pxs2.dtsi
++++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi
+@@ -597,8 +597,8 @@ usb0: usb@65a00000 {
+                       compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+                       status = "disabled";
+                       reg = <0x65a00000 0xcd00>;
+-                      interrupt-names = "host", "peripheral";
+-                      interrupts = <0 134 4>, <0 135 4>;
++                      interrupt-names = "dwc_usb3";
++                      interrupts = <0 134 4>;
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>;
+                       clock-names = "ref", "bus_early", "suspend";
+@@ -693,8 +693,8 @@ usb1: usb@65c00000 {
+                       compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+                       status = "disabled";
+                       reg = <0x65c00000 0xcd00>;
+-                      interrupt-names = "host", "peripheral";
+-                      interrupts = <0 137 4>, <0 138 4>;
++                      interrupt-names = "dwc_usb3";
++                      interrupts = <0 137 4>;
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>;
+                       clock-names = "ref", "bus_early", "suspend";
+-- 
+2.35.1
+
diff --git a/queue-5.19/arm-marvell-update-pcie-fixup.patch b/queue-5.19/arm-marvell-update-pcie-fixup.patch

new file mode 100644 (file)

index 0000000..01897c5
--- /dev/null
+++ b/queue-5.19/arm-marvell-update-pcie-fixup.patch
@@ -0,0 +1,148 @@
+From abff78c10948d56fdc7d97de109f40246a0e9533 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Nov 2021 18:12:58 +0100
+Subject: ARM: Marvell: Update PCIe fixup
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pali Rohár <pali@kernel.org>
+
+[ Upstream commit fdaa3725831972284ef2779ddba00491d9dbbfca ]
+
+- The code relies on rc_pci_fixup being called, which only happens
+  when CONFIG_PCI_QUIRKS is enabled, so add that to Kconfig. Omitting
+  this causes a booting failure with a non-obvious cause.
+- Update rc_pci_fixup to set the class properly, copying the
+  more modern style from other places
+- Correct the rc_pci_fixup comment
+
+This patch just re-applies commit 1dc831bf53fd ("ARM: Kirkwood: Update
+PCI-E fixup") for all other Marvell ARM platforms which have same buggy
+PCIe controller and do not use pci-mvebu.c controller driver yet.
+
+Long-term goal for these Marvell ARM platforms should be conversion to
+pci-mvebu.c controller driver and removal of these fixups in arch code.
+
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Cc: Jason Gunthorpe <jgg@nvidia.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Gregory CLEMENT <gregory.clement@bootlin.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/mach-dove/Kconfig    |  1 +
+ arch/arm/mach-dove/pcie.c     | 11 ++++++++---
+ arch/arm/mach-mv78xx0/pcie.c  | 11 ++++++++---
+ arch/arm/mach-orion5x/Kconfig |  1 +
+ arch/arm/mach-orion5x/pci.c   | 12 +++++++++---
+ 5 files changed, 27 insertions(+), 9 deletions(-)
+
+diff --git a/arch/arm/mach-dove/Kconfig b/arch/arm/mach-dove/Kconfig
+index c30c69c664ea..a568ef90633e 100644
+--- a/arch/arm/mach-dove/Kconfig
++++ b/arch/arm/mach-dove/Kconfig
+@@ -8,6 +8,7 @@ menuconfig ARCH_DOVE
+       select PINCTRL_DOVE
+       select PLAT_ORION_LEGACY
+       select PM_GENERIC_DOMAINS if PM
++      select PCI_QUIRKS if PCI
+       help
+         Support for the Marvell Dove SoC 88AP510
+ 
+diff --git a/arch/arm/mach-dove/pcie.c b/arch/arm/mach-dove/pcie.c
+index 2a493bdfffc6..f90f42fc495e 100644
+--- a/arch/arm/mach-dove/pcie.c
++++ b/arch/arm/mach-dove/pcie.c
+@@ -136,14 +136,19 @@ static struct pci_ops pcie_ops = {
+       .write = pcie_wr_conf,
+ };
+ 
++/*
++ * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it
++ * is operating as a root complex this needs to be switched to
++ * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on
++ * the device. Decoding setup is handled by the orion code.
++ */
+ static void rc_pci_fixup(struct pci_dev *dev)
+ {
+-      /*
+-       * Prevent enumeration of root complex.
+-       */
+       if (dev->bus->parent == NULL && dev->devfn == 0) {
+               int i;
+ 
++              dev->class &= 0xff;
++              dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+               for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+                       dev->resource[i].start = 0;
+                       dev->resource[i].end   = 0;
+diff --git a/arch/arm/mach-mv78xx0/pcie.c b/arch/arm/mach-mv78xx0/pcie.c
+index e15646af7f26..4f1847babef2 100644
+--- a/arch/arm/mach-mv78xx0/pcie.c
++++ b/arch/arm/mach-mv78xx0/pcie.c
+@@ -180,14 +180,19 @@ static struct pci_ops pcie_ops = {
+       .write = pcie_wr_conf,
+ };
+ 
++/*
++ * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it
++ * is operating as a root complex this needs to be switched to
++ * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on
++ * the device. Decoding setup is handled by the orion code.
++ */
+ static void rc_pci_fixup(struct pci_dev *dev)
+ {
+-      /*
+-       * Prevent enumeration of root complex.
+-       */
+       if (dev->bus->parent == NULL && dev->devfn == 0) {
+               int i;
+ 
++              dev->class &= 0xff;
++              dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+               for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+                       dev->resource[i].start = 0;
+                       dev->resource[i].end   = 0;
+diff --git a/arch/arm/mach-orion5x/Kconfig b/arch/arm/mach-orion5x/Kconfig
+index bf833b51931d..aeac281c8764 100644
+--- a/arch/arm/mach-orion5x/Kconfig
++++ b/arch/arm/mach-orion5x/Kconfig
+@@ -7,6 +7,7 @@ menuconfig ARCH_ORION5X
+       select GPIOLIB
+       select MVEBU_MBUS
+       select FORCE_PCI
++      select PCI_QUIRKS
+       select PHYLIB if NETDEVICES
+       select PLAT_ORION_LEGACY
+       help
+diff --git a/arch/arm/mach-orion5x/pci.c b/arch/arm/mach-orion5x/pci.c
+index 92e938bba20d..9574c73f3c03 100644
+--- a/arch/arm/mach-orion5x/pci.c
++++ b/arch/arm/mach-orion5x/pci.c
+@@ -515,14 +515,20 @@ static int __init pci_setup(struct pci_sys_data *sys)
+ /*****************************************************************************
+  * General PCIe + PCI
+  ****************************************************************************/
++
++/*
++ * The root complex has a hardwired class of PCI_CLASS_MEMORY_OTHER, when it
++ * is operating as a root complex this needs to be switched to
++ * PCI_CLASS_BRIDGE_HOST or Linux will errantly try to process the BAR's on
++ * the device. Decoding setup is handled by the orion code.
++ */
+ static void rc_pci_fixup(struct pci_dev *dev)
+ {
+-      /*
+-       * Prevent enumeration of root complex.
+-       */
+       if (dev->bus->parent == NULL && dev->devfn == 0) {
+               int i;
+ 
++              dev->class &= 0xff;
++              dev->class |= PCI_CLASS_BRIDGE_HOST << 8;
+               for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
+                       dev->resource[i].start = 0;
+                       dev->resource[i].end   = 0;
+-- 
+2.35.1
+
diff --git a/queue-5.19/arm64-dts-uniphier-fix-usb-interrupts-for-pxs3-soc.patch-8226 b/queue-5.19/arm64-dts-uniphier-fix-usb-interrupts-for-pxs3-soc.patch-8226

new file mode 100644 (file)

index 0000000..794d180
--- /dev/null
+++ b/queue-5.19/arm64-dts-uniphier-fix-usb-interrupts-for-pxs3-soc.patch-8226
@@ -0,0 +1,51 @@
+From 2a0fbcf58cb586a5517846eeb48180483653574a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 22:36:47 +0900
+Subject: arm64: dts: uniphier: Fix USB interrupts for PXs3 SoC
+
+From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+
+[ Upstream commit fe17b91a7777df140d0f1433991da67ba658796c ]
+
+An interrupt for USB device are shared with USB host. Set interrupt-names
+property to common "dwc_usb3" instead of "host" and "peripheral".
+
+Cc: stable@vger.kernel.org
+Fixes: d7b9beb830d7 ("arm64: dts: uniphier: Add USB3 controller nodes")
+Reported-by: Ryuta NAKANISHI <nakanishi.ryuta@socionext.com>
+Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+Signed-off-by: Arnd Bergmann <arnd@arndb.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+index be97da132258..ba75adedbf79 100644
+--- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
++++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi
+@@ -599,8 +599,8 @@ usb0: usb@65a00000 {
+                       compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+                       status = "disabled";
+                       reg = <0x65a00000 0xcd00>;
+-                      interrupt-names = "host", "peripheral";
+-                      interrupts = <0 134 4>, <0 135 4>;
++                      interrupt-names = "dwc_usb3";
++                      interrupts = <0 134 4>;
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&pinctrl_usb0>, <&pinctrl_usb2>;
+                       clock-names = "ref", "bus_early", "suspend";
+@@ -701,8 +701,8 @@ usb1: usb@65c00000 {
+                       compatible = "socionext,uniphier-dwc3", "snps,dwc3";
+                       status = "disabled";
+                       reg = <0x65c00000 0xcd00>;
+-                      interrupt-names = "host", "peripheral";
+-                      interrupts = <0 137 4>, <0 138 4>;
++                      interrupt-names = "dwc_usb3";
++                      interrupts = <0 137 4>;
+                       pinctrl-names = "default";
+                       pinctrl-0 = <&pinctrl_usb1>, <&pinctrl_usb3>;
+                       clock-names = "ref", "bus_early", "suspend";
+-- 
+2.35.1
+
diff --git a/queue-5.19/batman-adv-tracing-use-the-new-__vstring-helper.patch b/queue-5.19/batman-adv-tracing-use-the-new-__vstring-helper.patch

new file mode 100644 (file)

index 0000000..785c2ca
--- /dev/null
+++ b/queue-5.19/batman-adv-tracing-use-the-new-__vstring-helper.patch
@@ -0,0 +1,69 @@
+From 9d86e2369bbbb35df6c18f8e8705a0ae6144f1f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 19:16:50 -0400
+Subject: batman-adv: tracing: Use the new __vstring() helper
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 9abc291812d784bd4a26c01af4ebdbf9f2dbf0bb ]
+
+Instead of open coding a __dynamic_array() with a fixed length (which
+defeats the purpose of the dynamic array in the first place). Use the new
+__vstring() helper that will use a va_list and only write enough of the
+string into the ring buffer that is needed.
+
+Link: https://lkml.kernel.org/r/20220724191650.236b1355@rorschach.local.home
+
+Cc: Marek Lindner <mareklindner@neomailbox.ch>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Simon Wunderlich <sw@simonwunderlich.de>
+Cc: Antonio Quartulli <a@unstable.cc>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: b.a.t.m.a.n@lists.open-mesh.org
+Cc: netdev@vger.kernel.org
+Acked-by: Sven Eckelmann <sven@narfation.org>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/batman-adv/trace.h | 9 ++-------
+ 1 file changed, 2 insertions(+), 7 deletions(-)
+
+diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
+index d673ebdd0426..31c8f922651d 100644
+--- a/net/batman-adv/trace.h
++++ b/net/batman-adv/trace.h
+@@ -28,8 +28,6 @@
+ 
+ #endif /* CONFIG_BATMAN_ADV_TRACING */
+ 
+-#define BATADV_MAX_MSG_LEN    256
+-
+ TRACE_EVENT(batadv_dbg,
+ 
+           TP_PROTO(struct batadv_priv *bat_priv,
+@@ -40,16 +38,13 @@ TRACE_EVENT(batadv_dbg,
+           TP_STRUCT__entry(
+                   __string(device, bat_priv->soft_iface->name)
+                   __string(driver, KBUILD_MODNAME)
+-                  __dynamic_array(char, msg, BATADV_MAX_MSG_LEN)
++                  __vstring(msg, vaf->fmt, vaf->va)
+           ),
+ 
+           TP_fast_assign(
+                   __assign_str(device, bat_priv->soft_iface->name);
+                   __assign_str(driver, KBUILD_MODNAME);
+-                  WARN_ON_ONCE(vsnprintf(__get_dynamic_array(msg),
+-                                         BATADV_MAX_MSG_LEN,
+-                                         vaf->fmt,
+-                                         *vaf->va) >= BATADV_MAX_MSG_LEN);
++                  __assign_vstr(msg, vaf->fmt, vaf->va);
+           ),
+ 
+           TP_printk(
+-- 
+2.35.1
+
diff --git a/queue-5.19/block-add-bdev_max_segments-helper.patch b/queue-5.19/block-add-bdev_max_segments-helper.patch

new file mode 100644 (file)

index 0000000..6a83af3
--- /dev/null
+++ b/queue-5.19/block-add-bdev_max_segments-helper.patch
@@ -0,0 +1,40 @@
+From 088e7356f4d57e46caa8324cf9d804d64c5bd46d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:38 +0900
+Subject: block: add bdev_max_segments() helper
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 65ea1b66482f415d51cd46515b02477257330339 ]
+
+Add bdev_max_segments() like other queue parameters.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Jens Axboe <axboe@kernel.dk>
+Reviewed-by: Christoph Hellwig <hch@lst.de>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/blkdev.h | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
+index 2f7b43444c5f..62e3ff52ab03 100644
+--- a/include/linux/blkdev.h
++++ b/include/linux/blkdev.h
+@@ -1206,6 +1206,11 @@ bdev_max_zone_append_sectors(struct block_device *bdev)
+       return queue_max_zone_append_sectors(bdev_get_queue(bdev));
+ }
+ 
++static inline unsigned int bdev_max_segments(struct block_device *bdev)
++{
++      return queue_max_segments(bdev_get_queue(bdev));
++}
++
+ static inline unsigned queue_logical_block_size(const struct request_queue *q)
+ {
+       int retval = 512;
+-- 
+2.35.1
+
diff --git a/queue-5.19/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch b/queue-5.19/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch

new file mode 100644 (file)

index 0000000..31a9480
--- /dev/null
+++ b/queue-5.19/block-don-t-allow-the-same-type-rq_qos-add-more-than.patch
@@ -0,0 +1,199 @@
+From dbda36bace01b8607bd2ea2ca109b43a94b54efd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 17:36:16 +0800
+Subject: block: don't allow the same type rq_qos add more than once
+
+From: Jinke Han <hanjinke.666@bytedance.com>
+
+[ Upstream commit 14a6e2eb7df5c7897c15b109cba29ab0c4a791b6 ]
+
+In our test of iocost, we encountered some list add/del corruptions of
+inner_walk list in ioc_timer_fn.
+
+The reason can be described as follows:
+
+cpu 0                                  cpu 1
+ioc_qos_write                          ioc_qos_write
+
+ioc = q_to_ioc(queue);
+if (!ioc) {
+        ioc = kzalloc();
+                                       ioc = q_to_ioc(queue);
+                                       if (!ioc) {
+                                               ioc = kzalloc();
+                                               ...
+                                               rq_qos_add(q, rqos);
+                                       }
+        ...
+        rq_qos_add(q, rqos);
+        ...
+}
+
+When the io.cost.qos file is written by two cpus concurrently, rq_qos may
+be added to one disk twice. In that case, there will be two iocs enabled
+and running on one disk. They own different iocgs on their active list. In
+the ioc_timer_fn function, because of the iocgs from two iocs have the
+same root iocg, the root iocg's walk_list may be overwritten by each other
+and this leads to list add/del corruptions in building or destroying the
+inner_walk list.
+
+And so far, the blk-rq-qos framework works in case that one instance for
+one type rq_qos per queue by default. This patch make this explicit and
+also fix the crash above.
+
+Signed-off-by: Jinke Han <hanjinke.666@bytedance.com>
+Reviewed-by: Muchun Song <songmuchun@bytedance.com>
+Acked-by: Tejun Heo <tj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Link: https://lore.kernel.org/r/20220720093616.70584-1-hanjinke.666@bytedance.com
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ block/blk-iocost.c    | 20 +++++++++++++-------
+ block/blk-iolatency.c | 18 +++++++++++-------
+ block/blk-rq-qos.h    | 11 ++++++++++-
+ block/blk-wbt.c       | 12 +++++++++++-
+ 4 files changed, 45 insertions(+), 16 deletions(-)
+
+diff --git a/block/blk-iocost.c b/block/blk-iocost.c
+index 33a11ba971ea..c6181357e545 100644
+--- a/block/blk-iocost.c
++++ b/block/blk-iocost.c
+@@ -2886,15 +2886,21 @@ static int blk_iocost_init(struct request_queue *q)
+        * called before policy activation completion, can't assume that the
+        * target bio has an iocg associated and need to test for NULL iocg.
+        */
+-      rq_qos_add(q, rqos);
++      ret = rq_qos_add(q, rqos);
++      if (ret)
++              goto err_free_ioc;
++
+       ret = blkcg_activate_policy(q, &blkcg_policy_iocost);
+-      if (ret) {
+-              rq_qos_del(q, rqos);
+-              free_percpu(ioc->pcpu_stat);
+-              kfree(ioc);
+-              return ret;
+-      }
++      if (ret)
++              goto err_del_qos;
+       return 0;
++
++err_del_qos:
++      rq_qos_del(q, rqos);
++err_free_ioc:
++      free_percpu(ioc->pcpu_stat);
++      kfree(ioc);
++      return ret;
+ }
+ 
+ static struct blkcg_policy_data *ioc_cpd_alloc(gfp_t gfp)
+diff --git a/block/blk-iolatency.c b/block/blk-iolatency.c
+index 9568bf8dfe82..7845dca5fcfd 100644
+--- a/block/blk-iolatency.c
++++ b/block/blk-iolatency.c
+@@ -773,19 +773,23 @@ int blk_iolatency_init(struct request_queue *q)
+       rqos->ops = &blkcg_iolatency_ops;
+       rqos->q = q;
+ 
+-      rq_qos_add(q, rqos);
+-
++      ret = rq_qos_add(q, rqos);
++      if (ret)
++              goto err_free;
+       ret = blkcg_activate_policy(q, &blkcg_policy_iolatency);
+-      if (ret) {
+-              rq_qos_del(q, rqos);
+-              kfree(blkiolat);
+-              return ret;
+-      }
++      if (ret)
++              goto err_qos_del;
+ 
+       timer_setup(&blkiolat->timer, blkiolatency_timer_fn, 0);
+       INIT_WORK(&blkiolat->enable_work, blkiolatency_enable_work_fn);
+ 
+       return 0;
++
++err_qos_del:
++      rq_qos_del(q, rqos);
++err_free:
++      kfree(blkiolat);
++      return ret;
+ }
+ 
+ static void iolatency_set_min_lat_nsec(struct blkcg_gq *blkg, u64 val)
+diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
+index 0e46052b018a..08b856570ad1 100644
+--- a/block/blk-rq-qos.h
++++ b/block/blk-rq-qos.h
+@@ -86,7 +86,7 @@ static inline void rq_wait_init(struct rq_wait *rq_wait)
+       init_waitqueue_head(&rq_wait->wait);
+ }
+ 
+-static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
++static inline int rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+ {
+       /*
+        * No IO can be in-flight when adding rqos, so freeze queue, which
+@@ -98,6 +98,8 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+       blk_mq_freeze_queue(q);
+ 
+       spin_lock_irq(&q->queue_lock);
++      if (rq_qos_id(q, rqos->id))
++              goto ebusy;
+       rqos->next = q->rq_qos;
+       q->rq_qos = rqos;
+       spin_unlock_irq(&q->queue_lock);
+@@ -109,6 +111,13 @@ static inline void rq_qos_add(struct request_queue *q, struct rq_qos *rqos)
+               blk_mq_debugfs_register_rqos(rqos);
+               mutex_unlock(&q->debugfs_mutex);
+       }
++
++      return 0;
++ebusy:
++      spin_unlock_irq(&q->queue_lock);
++      blk_mq_unfreeze_queue(q);
++      return -EBUSY;
++
+ }
+ 
+ static inline void rq_qos_del(struct request_queue *q, struct rq_qos *rqos)
+diff --git a/block/blk-wbt.c b/block/blk-wbt.c
+index 0c119be0e813..ae6ea0b54579 100644
+--- a/block/blk-wbt.c
++++ b/block/blk-wbt.c
+@@ -820,6 +820,7 @@ int wbt_init(struct request_queue *q)
+ {
+       struct rq_wb *rwb;
+       int i;
++      int ret;
+ 
+       rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
+       if (!rwb)
+@@ -846,7 +847,10 @@ int wbt_init(struct request_queue *q)
+       /*
+        * Assign rwb and add the stats callback.
+        */
+-      rq_qos_add(q, &rwb->rqos);
++      ret = rq_qos_add(q, &rwb->rqos);
++      if (ret)
++              goto err_free;
++
+       blk_stat_add_callback(q, rwb->cb);
+ 
+       rwb->min_lat_nsec = wbt_default_latency_nsec(q);
+@@ -855,4 +859,10 @@ int wbt_init(struct request_queue *q)
+       wbt_set_write_cache(q, test_bit(QUEUE_FLAG_WC, &q->queue_flags));
+ 
+       return 0;
++
++err_free:
++      blk_stat_free_callback(rwb->cb);
++      kfree(rwb);
++      return ret;
++
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.19/bpf-fix-kasan-use-after-free-read-in-compute_effecti.patch b/queue-5.19/bpf-fix-kasan-use-after-free-read-in-compute_effecti.patch

new file mode 100644 (file)

index 0000000..cad01d8
--- /dev/null
+++ b/queue-5.19/bpf-fix-kasan-use-after-free-read-in-compute_effecti.patch
@@ -0,0 +1,142 @@
+From 98b8d2386f7e0243a609a693968681f979d159f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 17 May 2022 11:04:20 -0700
+Subject: bpf: Fix KASAN use-after-free Read in compute_effective_progs
+
+From: Tadeusz Struk <tadeusz.struk@linaro.org>
+
+[ Upstream commit 4c46091ee985ae84c60c5e95055d779fcd291d87 ]
+
+Syzbot found a Use After Free bug in compute_effective_progs().
+The reproducer creates a number of BPF links, and causes a fault
+injected alloc to fail, while calling bpf_link_detach on them.
+Link detach triggers the link to be freed by bpf_link_free(),
+which calls __cgroup_bpf_detach() and update_effective_progs().
+If the memory allocation in this function fails, the function restores
+the pointer to the bpf_cgroup_link on the cgroup list, but the memory
+gets freed just after it returns. After this, every subsequent call to
+update_effective_progs() causes this already deallocated pointer to be
+dereferenced in prog_list_length(), and triggers KASAN UAF error.
+
+To fix this issue don't preserve the pointer to the prog or link in the
+list, but remove it and replace it with a dummy prog without shrinking
+the table. The subsequent call to __cgroup_bpf_detach() or
+__cgroup_bpf_detach() will correct it.
+
+Fixes: af6eea57437a ("bpf: Implement bpf_link-based cgroup BPF program attachment")
+Reported-by: <syzbot+f264bffdfbd5614f3bb2@syzkaller.appspotmail.com>
+Signed-off-by: Tadeusz Struk <tadeusz.struk@linaro.org>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Cc: <stable@vger.kernel.org>
+Link: https://syzkaller.appspot.com/bug?id=8ebf179a95c2a2670f7cf1ba62429ec044369db4
+Link: https://lore.kernel.org/bpf/20220517180420.87954-1-tadeusz.struk@linaro.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/cgroup.c | 70 ++++++++++++++++++++++++++++++++++++++-------
+ 1 file changed, 60 insertions(+), 10 deletions(-)
+
+diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
+index afb414b26d01..7a394f7c205c 100644
+--- a/kernel/bpf/cgroup.c
++++ b/kernel/bpf/cgroup.c
+@@ -720,6 +720,60 @@ static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
+       return ERR_PTR(-ENOENT);
+ }
+ 
++/**
++ * purge_effective_progs() - After compute_effective_progs fails to alloc new
++ *                           cgrp->bpf.inactive table we can recover by
++ *                           recomputing the array in place.
++ *
++ * @cgrp: The cgroup which descendants to travers
++ * @prog: A program to detach or NULL
++ * @link: A link to detach or NULL
++ * @atype: Type of detach operation
++ */
++static void purge_effective_progs(struct cgroup *cgrp, struct bpf_prog *prog,
++                                struct bpf_cgroup_link *link,
++                                enum cgroup_bpf_attach_type atype)
++{
++      struct cgroup_subsys_state *css;
++      struct bpf_prog_array *progs;
++      struct bpf_prog_list *pl;
++      struct list_head *head;
++      struct cgroup *cg;
++      int pos;
++
++      /* recompute effective prog array in place */
++      css_for_each_descendant_pre(css, &cgrp->self) {
++              struct cgroup *desc = container_of(css, struct cgroup, self);
++
++              if (percpu_ref_is_zero(&desc->bpf.refcnt))
++                      continue;
++
++              /* find position of link or prog in effective progs array */
++              for (pos = 0, cg = desc; cg; cg = cgroup_parent(cg)) {
++                      if (pos && !(cg->bpf.flags[atype] & BPF_F_ALLOW_MULTI))
++                              continue;
++
++                      head = &cg->bpf.progs[atype];
++                      list_for_each_entry(pl, head, node) {
++                              if (!prog_list_prog(pl))
++                                      continue;
++                              if (pl->prog == prog && pl->link == link)
++                                      goto found;
++                              pos++;
++                      }
++              }
++found:
++              BUG_ON(!cg);
++              progs = rcu_dereference_protected(
++                              desc->bpf.effective[atype],
++                              lockdep_is_held(&cgroup_mutex));
++
++              /* Remove the program from the array */
++              WARN_ONCE(bpf_prog_array_delete_safe_at(progs, pos),
++                        "Failed to purge a prog from array at index %d", pos);
++      }
++}
++
+ /**
+  * __cgroup_bpf_detach() - Detach the program or link from a cgroup, and
+  *                         propagate the change to descendants
+@@ -739,7 +793,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+       struct bpf_prog_list *pl;
+       struct list_head *progs;
+       u32 flags;
+-      int err;
+ 
+       atype = to_cgroup_bpf_attach_type(type);
+       if (atype < 0)
+@@ -761,9 +814,12 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+       pl->prog = NULL;
+       pl->link = NULL;
+ 
+-      err = update_effective_progs(cgrp, atype);
+-      if (err)
+-              goto cleanup;
++      if (update_effective_progs(cgrp, atype)) {
++              /* if update effective array failed replace the prog with a dummy prog*/
++              pl->prog = old_prog;
++              pl->link = link;
++              purge_effective_progs(cgrp, old_prog, link, atype);
++      }
+ 
+       /* now can actually delete it from this cgroup list */
+       list_del(&pl->node);
+@@ -775,12 +831,6 @@ static int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+               bpf_prog_put(old_prog);
+       static_branch_dec(&cgroup_bpf_enabled_key[atype]);
+       return 0;
+-
+-cleanup:
+-      /* restore back prog or link */
+-      pl->prog = old_prog;
+-      pl->link = link;
+-      return err;
+ }
+ 
+ static int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-convert-count_max_extents-to-use-fs_info-max_e.patch b/queue-5.19/btrfs-convert-count_max_extents-to-use-fs_info-max_e.patch

new file mode 100644 (file)

index 0000000..0de61ef
--- /dev/null
+++ b/queue-5.19/btrfs-convert-count_max_extents-to-use-fs_info-max_e.patch
@@ -0,0 +1,151 @@
+From dc339c15be74e8d0f728e755212659c1ca1f83ba Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:41 +0900
+Subject: btrfs: convert count_max_extents() to use fs_info->max_extent_size
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 7d7672bc5d1038c745716c397d892d21e29de71c ]
+
+If count_max_extents() uses BTRFS_MAX_EXTENT_SIZE to calculate the number
+of extents needed, btrfs release the metadata reservation too much on its
+way to write out the data.
+
+Now that BTRFS_MAX_EXTENT_SIZE is replaced with fs_info->max_extent_size,
+convert count_max_extents() to use it instead, and fix the calculation of
+the metadata reservation.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h          | 21 +++++++++++++--------
+ fs/btrfs/delalloc-space.c |  6 +++---
+ fs/btrfs/inode.c          | 16 ++++++++--------
+ 3 files changed, 24 insertions(+), 19 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 364c71ad7cce..d306db5dbdc2 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -107,14 +107,6 @@ struct btrfs_ioctl_encoded_io_args;
+ #define BTRFS_STAT_CURR               0
+ #define BTRFS_STAT_PREV               1
+ 
+-/*
+- * Count how many BTRFS_MAX_EXTENT_SIZE cover the @size
+- */
+-static inline u32 count_max_extents(u64 size)
+-{
+-      return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
+-}
+-
+ static inline unsigned long btrfs_chunk_item_size(int num_stripes)
+ {
+       BUG_ON(num_stripes == 0);
+@@ -4017,6 +4009,19 @@ static inline bool btrfs_is_zoned(const struct btrfs_fs_info *fs_info)
+       return fs_info->zone_size > 0;
+ }
+ 
++/*
++ * Count how many fs_info->max_extent_size cover the @size
++ */
++static inline u32 count_max_extents(struct btrfs_fs_info *fs_info, u64 size)
++{
++#ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS
++      if (!fs_info)
++              return div_u64(size + BTRFS_MAX_EXTENT_SIZE - 1, BTRFS_MAX_EXTENT_SIZE);
++#endif
++
++      return div_u64(size + fs_info->max_extent_size - 1, fs_info->max_extent_size);
++}
++
+ static inline bool btrfs_is_data_reloc_root(const struct btrfs_root *root)
+ {
+       return root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID;
+diff --git a/fs/btrfs/delalloc-space.c b/fs/btrfs/delalloc-space.c
+index 36ab0859a263..1e8f17ff829e 100644
+--- a/fs/btrfs/delalloc-space.c
++++ b/fs/btrfs/delalloc-space.c
+@@ -273,7 +273,7 @@ static void calc_inode_reservations(struct btrfs_fs_info *fs_info,
+                                   u64 num_bytes, u64 disk_num_bytes,
+                                   u64 *meta_reserve, u64 *qgroup_reserve)
+ {
+-      u64 nr_extents = count_max_extents(num_bytes);
++      u64 nr_extents = count_max_extents(fs_info, num_bytes);
+       u64 csum_leaves = btrfs_csum_bytes_to_leaves(fs_info, disk_num_bytes);
+       u64 inode_update = btrfs_calc_metadata_size(fs_info, 1);
+ 
+@@ -350,7 +350,7 @@ int btrfs_delalloc_reserve_metadata(struct btrfs_inode *inode, u64 num_bytes,
+        * needs to free the reservation we just made.
+        */
+       spin_lock(&inode->lock);
+-      nr_extents = count_max_extents(num_bytes);
++      nr_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, nr_extents);
+       inode->csum_bytes += disk_num_bytes;
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+@@ -413,7 +413,7 @@ void btrfs_delalloc_release_extents(struct btrfs_inode *inode, u64 num_bytes)
+       unsigned num_extents;
+ 
+       spin_lock(&inode->lock);
+-      num_extents = count_max_extents(num_bytes);
++      num_extents = count_max_extents(fs_info, num_bytes);
+       btrfs_mod_outstanding_extents(inode, -num_extents);
+       btrfs_calculate_inode_block_rsv_size(fs_info, inode);
+       spin_unlock(&inode->lock);
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index d9123bfeae8d..30e454197fb9 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2217,10 +2217,10 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+                * applies here, just in reverse.
+                */
+               new_size = orig->end - split + 1;
+-              num_extents = count_max_extents(new_size);
++              num_extents = count_max_extents(fs_info, new_size);
+               new_size = split - orig->start;
+-              num_extents += count_max_extents(new_size);
+-              if (count_max_extents(size) >= num_extents)
++              num_extents += count_max_extents(fs_info, new_size);
++              if (count_max_extents(fs_info, size) >= num_extents)
+                       return;
+       }
+ 
+@@ -2277,10 +2277,10 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+        * this case.
+        */
+       old_size = other->end - other->start + 1;
+-      num_extents = count_max_extents(old_size);
++      num_extents = count_max_extents(fs_info, old_size);
+       old_size = new->end - new->start + 1;
+-      num_extents += count_max_extents(old_size);
+-      if (count_max_extents(new_size) >= num_extents)
++      num_extents += count_max_extents(fs_info, old_size);
++      if (count_max_extents(fs_info, new_size) >= num_extents)
+               return;
+ 
+       spin_lock(&BTRFS_I(inode)->lock);
+@@ -2359,7 +2359,7 @@ void btrfs_set_delalloc_extent(struct inode *inode, struct extent_state *state,
+       if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) {
+               struct btrfs_root *root = BTRFS_I(inode)->root;
+               u64 len = state->end + 1 - state->start;
+-              u32 num_extents = count_max_extents(len);
++              u32 num_extents = count_max_extents(fs_info, len);
+               bool do_list = !btrfs_is_free_space_inode(BTRFS_I(inode));
+ 
+               spin_lock(&BTRFS_I(inode)->lock);
+@@ -2401,7 +2401,7 @@ void btrfs_clear_delalloc_extent(struct inode *vfs_inode,
+       struct btrfs_inode *inode = BTRFS_I(vfs_inode);
+       struct btrfs_fs_info *fs_info = btrfs_sb(vfs_inode->i_sb);
+       u64 len = state->end + 1 - state->start;
+-      u32 num_extents = count_max_extents(len);
++      u32 num_extents = count_max_extents(fs_info, len);
+ 
+       if ((state->state & EXTENT_DEFRAG) && (*bits & EXTENT_DEFRAG)) {
+               spin_lock(&inode->lock);
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch b/queue-5.19/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch

new file mode 100644 (file)

index 0000000..93c5f84
--- /dev/null
+++ b/queue-5.19/btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch
@@ -0,0 +1,196 @@
+From c88d441a0dec539a8ca20965770aa0ae59132c2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 15:40:59 +0900
+Subject: btrfs: ensure pages are unlocked on cow_file_range() failure
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 9ce7466f372d83054c7494f6b3e4b9abaf3f0355 ]
+
+There is a hung_task report on zoned btrfs like below.
+
+https://github.com/naota/linux/issues/59
+
+  [726.328648] INFO: task rocksdb:high0:11085 blocked for more than 241 seconds.
+  [726.329839]       Not tainted 5.16.0-rc1+ #1
+  [726.330484] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
+  [726.331603] task:rocksdb:high0   state:D stack:    0 pid:11085 ppid: 11082 flags:0x00000000
+  [726.331608] Call Trace:
+  [726.331611]  <TASK>
+  [726.331614]  __schedule+0x2e5/0x9d0
+  [726.331622]  schedule+0x58/0xd0
+  [726.331626]  io_schedule+0x3f/0x70
+  [726.331629]  __folio_lock+0x125/0x200
+  [726.331634]  ? find_get_entries+0x1bc/0x240
+  [726.331638]  ? filemap_invalidate_unlock_two+0x40/0x40
+  [726.331642]  truncate_inode_pages_range+0x5b2/0x770
+  [726.331649]  truncate_inode_pages_final+0x44/0x50
+  [726.331653]  btrfs_evict_inode+0x67/0x480
+  [726.331658]  evict+0xd0/0x180
+  [726.331661]  iput+0x13f/0x200
+  [726.331664]  do_unlinkat+0x1c0/0x2b0
+  [726.331668]  __x64_sys_unlink+0x23/0x30
+  [726.331670]  do_syscall_64+0x3b/0xc0
+  [726.331674]  entry_SYSCALL_64_after_hwframe+0x44/0xae
+  [726.331677] RIP: 0033:0x7fb9490a171b
+  [726.331681] RSP: 002b:00007fb943ffac68 EFLAGS: 00000246 ORIG_RAX: 0000000000000057
+  [726.331684] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fb9490a171b
+  [726.331686] RDX: 00007fb943ffb040 RSI: 000055a6bbe6ec20 RDI: 00007fb94400d300
+  [726.331687] RBP: 00007fb943ffad00 R08: 0000000000000000 R09: 0000000000000000
+  [726.331688] R10: 0000000000000031 R11: 0000000000000246 R12: 00007fb943ffb000
+  [726.331690] R13: 00007fb943ffb040 R14: 0000000000000000 R15: 00007fb943ffd260
+  [726.331693]  </TASK>
+
+While we debug the issue, we found running fstests generic/551 on 5GB
+non-zoned null_blk device in the emulated zoned mode also had a
+similar hung issue.
+
+Also, we can reproduce the same symptom with an error injected
+cow_file_range() setup.
+
+The hang occurs when cow_file_range() fails in the middle of
+allocation. cow_file_range() called from do_allocation_zoned() can
+split the give region ([start, end]) for allocation depending on
+current block group usages. When btrfs can allocate bytes for one part
+of the split regions but fails for the other region (e.g. because of
+-ENOSPC), we return the error leaving the pages in the succeeded regions
+locked. Technically, this occurs only when @unlock == 0. Otherwise, we
+unlock the pages in an allocated region after creating an ordered
+extent.
+
+Considering the callers of cow_file_range(unlock=0) won't write out
+the pages, we can unlock the pages on error exit from
+cow_file_range(). So, we can ensure all the pages except @locked_page
+are unlocked on error case.
+
+In summary, cow_file_range now behaves like this:
+
+- page_started == 1 (return value)
+  - All the pages are unlocked. IO is started.
+- unlock == 1
+  - All the pages except @locked_page are unlocked in any case
+- unlock == 0
+  - On success, all the pages are locked for writing out them
+  - On failure, all the pages except @locked_page are unlocked
+
+Fixes: 42c011000963 ("btrfs: zoned: introduce dedicated data write path for zoned filesystems")
+CC: stable@vger.kernel.org # 5.12+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 72 ++++++++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 64 insertions(+), 8 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index d50448bf8eed..52b2d1b48d2e 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1133,6 +1133,28 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
+  * *page_started is set to one if we unlock locked_page and do everything
+  * required to start IO on it.  It may be clean and already done with
+  * IO when we return.
++ *
++ * When unlock == 1, we unlock the pages in successfully allocated regions.
++ * When unlock == 0, we leave them locked for writing them out.
++ *
++ * However, we unlock all the pages except @locked_page in case of failure.
++ *
++ * In summary, page locking state will be as follow:
++ *
++ * - page_started == 1 (return value)
++ *     - All the pages are unlocked. IO is started.
++ *     - Note that this can happen only on success
++ * - unlock == 1
++ *     - All the pages except @locked_page are unlocked in any case
++ * - unlock == 0
++ *     - On success, all the pages are locked for writing out them
++ *     - On failure, all the pages except @locked_page are unlocked
++ *
++ * When a failure happens in the second or later iteration of the
++ * while-loop, the ordered extents created in previous iterations are kept
++ * intact. So, the caller must clean them up by calling
++ * btrfs_cleanup_ordered_extents(). See btrfs_run_delalloc_range() for
++ * example.
+  */
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+@@ -1142,6 +1164,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+       u64 alloc_hint = 0;
++      u64 orig_start = start;
+       u64 num_bytes;
+       unsigned long ram_size;
+       u64 cur_alloc_size = 0;
+@@ -1329,18 +1352,44 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+       btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ out_unlock:
++      /*
++       * Now, we have three regions to clean up:
++       *
++       * |-------(1)----|---(2)---|-------------(3)----------|
++       * `- orig_start  `- start  `- start + cur_alloc_size  `- end
++       *
++       * We process each region below.
++       */
++
+       clear_bits = EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DELALLOC_NEW |
+               EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV;
+       page_ops = PAGE_UNLOCK | PAGE_START_WRITEBACK | PAGE_END_WRITEBACK;
++
+       /*
+-       * If we reserved an extent for our delalloc range (or a subrange) and
+-       * failed to create the respective ordered extent, then it means that
+-       * when we reserved the extent we decremented the extent's size from
+-       * the data space_info's bytes_may_use counter and incremented the
+-       * space_info's bytes_reserved counter by the same amount. We must make
+-       * sure extent_clear_unlock_delalloc() does not try to decrement again
+-       * the data space_info's bytes_may_use counter, therefore we do not pass
+-       * it the flag EXTENT_CLEAR_DATA_RESV.
++       * For the range (1). We have already instantiated the ordered extents
++       * for this region. They are cleaned up by
++       * btrfs_cleanup_ordered_extents() in e.g,
++       * btrfs_run_delalloc_range(). EXTENT_LOCKED | EXTENT_DELALLOC are
++       * already cleared in the above loop. And, EXTENT_DELALLOC_NEW |
++       * EXTENT_DEFRAG | EXTENT_CLEAR_META_RESV are handled by the cleanup
++       * function.
++       *
++       * However, in case of unlock == 0, we still need to unlock the pages
++       * (except @locked_page) to ensure all the pages are unlocked.
++       */
++      if (!unlock && orig_start < start)
++              extent_clear_unlock_delalloc(inode, orig_start, start - 1,
++                                           locked_page, 0, page_ops);
++
++      /*
++       * For the range (2). If we reserved an extent for our delalloc range
++       * (or a subrange) and failed to create the respective ordered extent,
++       * then it means that when we reserved the extent we decremented the
++       * extent's size from the data space_info's bytes_may_use counter and
++       * incremented the space_info's bytes_reserved counter by the same
++       * amount. We must make sure extent_clear_unlock_delalloc() does not try
++       * to decrement again the data space_info's bytes_may_use counter,
++       * therefore we do not pass it the flag EXTENT_CLEAR_DATA_RESV.
+        */
+       if (extent_reserved) {
+               extent_clear_unlock_delalloc(inode, start,
+@@ -1352,6 +1401,13 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+               if (start >= end)
+                       goto out;
+       }
++
++      /*
++       * For the range (3). We never touched the region. In addition to the
++       * clear_bits above, we add EXTENT_CLEAR_DATA_RESV to release the data
++       * space_info's bytes_may_use counter, reserved in
++       * btrfs_check_data_free_space().
++       */
+       extent_clear_unlock_delalloc(inode, start, end, locked_page,
+                                    clear_bits | EXTENT_CLEAR_DATA_RESV,
+                                    page_ops);
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch b/queue-5.19/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch

new file mode 100644 (file)

index 0000000..98ec7f0
--- /dev/null
+++ b/queue-5.19/btrfs-fix-error-handling-of-fallback-uncompress-writ.patch
@@ -0,0 +1,72 @@
+From 21e8d3fc8352a74ce5685e4526adf9353be9b5c7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 15:41:01 +0900
+Subject: btrfs: fix error handling of fallback uncompress write
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 71aa147b4d9d81fa65afa6016f50d7818b64a54f ]
+
+When cow_file_range() fails in the middle of the allocation loop, it
+unlocks the pages but leaves the ordered extents intact. Thus, we need
+to call btrfs_cleanup_ordered_extents() to finish the created ordered
+extents.
+
+Also, we need to call end_extent_writepage() if locked_page is available
+because btrfs_cleanup_ordered_extents() never processes the region on
+the locked_page.
+
+Furthermore, we need to set the mapping as error if locked_page is
+unavailable before unlocking the pages, so that the errno is properly
+propagated to the user space.
+
+CC: stable@vger.kernel.org # 5.18+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/inode.c | 17 +++++++++++++++--
+ 1 file changed, 15 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 52b2d1b48d2e..25872ee8594e 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -927,8 +927,18 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
+               goto out;
+       }
+       if (ret < 0) {
+-              if (locked_page)
++              btrfs_cleanup_ordered_extents(inode, locked_page, start, end - start + 1);
++              if (locked_page) {
++                      const u64 page_start = page_offset(locked_page);
++                      const u64 page_end = page_start + PAGE_SIZE - 1;
++
++                      btrfs_page_set_error(inode->root->fs_info, locked_page,
++                                           page_start, PAGE_SIZE);
++                      set_page_writeback(locked_page);
++                      end_page_writeback(locked_page);
++                      end_extent_writepage(locked_page, ret, page_start, page_end);
+                       unlock_page(locked_page);
++              }
+               goto out;
+       }
+ 
+@@ -1377,9 +1387,12 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+        * However, in case of unlock == 0, we still need to unlock the pages
+        * (except @locked_page) to ensure all the pages are unlocked.
+        */
+-      if (!unlock && orig_start < start)
++      if (!unlock && orig_start < start) {
++              if (!locked_page)
++                      mapping_set_error(inode->vfs_inode.i_mapping, ret);
+               extent_clear_unlock_delalloc(inode, orig_start, start - 1,
+                                            locked_page, 0, page_ops);
++      }
+ 
+       /*
+        * For the range (2). If we reserved an extent for our delalloc range
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-join-running-log-transaction-when-logging-new-.patch b/queue-5.19/btrfs-join-running-log-transaction-when-logging-new-.patch

new file mode 100644 (file)

index 0000000..182046b
--- /dev/null
+++ b/queue-5.19/btrfs-join-running-log-transaction-when-logging-new-.patch
@@ -0,0 +1,89 @@
+From e626205ff01e1b66d58a88f818edc71f3d358f94 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Jul 2022 22:05:05 +0100
+Subject: btrfs: join running log transaction when logging new name
+
+From: Filipe Manana <fdmanana@suse.com>
+
+[ Upstream commit 723df2bcc9e166ac7fb82b3932a53e09415dfcde ]
+
+When logging a new name, in case of a rename, we pin the log before
+changing it. We then either delete a directory entry from the log or
+insert a key range item to mark the old name for deletion on log replay.
+
+However when doing one of those log changes we may have another task that
+started writing out the log (at btrfs_sync_log()) and it started before
+we pinned the log root. So we may end up changing a log tree while its
+writeback is being started by another task syncing the log. This can lead
+to inconsistencies in a log tree and other unexpected results during log
+replay, because we can get some committed node pointing to a node/leaf
+that ends up not getting written to disk before the next log commit.
+
+The problem, conceptually, started to happen in commit 88d2beec7e53fc
+("btrfs: avoid logging all directory changes during renames"), because
+there we started to update the log without joining its current transaction
+first.
+
+However the problem only became visible with commit 259c4b96d78dda
+("btrfs: stop doing unnecessary log updates during a rename"), and that is
+because we used to pin the log at btrfs_rename() and then before entering
+btrfs_log_new_name(), when unlinking the old dentry, we ended up at
+btrfs_del_inode_ref_in_log() and btrfs_del_dir_entries_in_log(). Both
+of them join the current log transaction, effectively waiting for any log
+transaction writeout (due to acquiring the root's log_mutex). This made it
+safe even after leaving the current log transaction, because we remained
+with the log pinned when we called btrfs_log_new_name().
+
+Then in commit 259c4b96d78dda ("btrfs: stop doing unnecessary log updates
+during a rename"), we removed the log pinning from btrfs_rename() and
+stopped calling btrfs_del_inode_ref_in_log() and
+btrfs_del_dir_entries_in_log() during the rename, and started to do all
+the needed work at btrfs_log_new_name(), but without joining the current
+log transaction, only pinning the log, which is racy because another task
+may have started writeout of the log tree right before we pinned the log.
+
+Both commits landed in kernel 5.18, so it doesn't make any practical
+difference which should be blamed, but I'm blaming the second commit only
+because with the first one, by chance, the problem did not happen due to
+the fact we joined the log transaction after pinning the log and unpinned
+it only after calling btrfs_log_new_name().
+
+So make btrfs_log_new_name() join the current log transaction instead of
+pinning it, so that we never do log updates if it's writeout is starting.
+
+Fixes: 259c4b96d78dda ("btrfs: stop doing unnecessary log updates during a rename")
+CC: stable@vger.kernel.org # 5.18+
+Reported-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Tested-by: Zygo Blaxell <ce3g8jdj@umail.furryterror.org>
+Reviewed-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/tree-log.c | 9 ++++++++-
+ 1 file changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index c94713c811bb..3c962bfd204f 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -7029,8 +7029,15 @@ void btrfs_log_new_name(struct btrfs_trans_handle *trans,
+                * anyone from syncing the log until we have updated both inodes
+                * in the log.
+                */
++              ret = join_running_log_trans(root);
++              /*
++               * At least one of the inodes was logged before, so this should
++               * not fail, but if it does, it's not serious, just bail out and
++               * mark the log for a full commit.
++               */
++              if (WARN_ON_ONCE(ret < 0))
++                      goto out;
+               log_pinned = true;
+-              btrfs_pin_log_trans(root);
+ 
+               path = btrfs_alloc_path();
+               if (!path) {
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-let-can_allocate_chunk-return-error.patch b/queue-5.19/btrfs-let-can_allocate_chunk-return-error.patch

new file mode 100644 (file)

index 0000000..affaed2
--- /dev/null
+++ b/queue-5.19/btrfs-let-can_allocate_chunk-return-error.patch
@@ -0,0 +1,66 @@
+From 3abed6315e58e40802fcc82206f4a5ce7afff442 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:43 +0900
+Subject: btrfs: let can_allocate_chunk return error
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit bb9950d3df7169a673c594d38fb74e241ed4fb2a ]
+
+For the later patch, convert the return type from bool to int and return
+errors. No functional changes.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 15 ++++++++-------
+ 1 file changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index a3afc15430ce..506852795db1 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3981,12 +3981,12 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
+       }
+ }
+ 
+-static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
+-                             struct find_free_extent_ctl *ffe_ctl)
++static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
++                            struct find_free_extent_ctl *ffe_ctl)
+ {
+       switch (ffe_ctl->policy) {
+       case BTRFS_EXTENT_ALLOC_CLUSTERED:
+-              return true;
++              return 0;
+       case BTRFS_EXTENT_ALLOC_ZONED:
+               /*
+                * If we have enough free space left in an already
+@@ -3996,8 +3996,8 @@ static bool can_allocate_chunk(struct btrfs_fs_info *fs_info,
+                */
+               if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
+                   !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
+-                      return false;
+-              return true;
++                      return -ENOSPC;
++              return 0;
+       default:
+               BUG();
+       }
+@@ -4079,8 +4079,9 @@ static int find_free_extent_update_loop(struct btrfs_fs_info *fs_info,
+                       int exist = 0;
+ 
+                       /*Check if allocation policy allows to create a new chunk */
+-                      if (!can_allocate_chunk(fs_info, ffe_ctl))
+-                              return -ENOSPC;
++                      ret = can_allocate_chunk(fs_info, ffe_ctl);
++                      if (ret)
++                              return ret;
+ 
+                       trans = current->journal_info;
+                       if (trans)
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch b/queue-5.19/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch

new file mode 100644 (file)

index 0000000..50d9860
--- /dev/null
+++ b/queue-5.19/btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch
@@ -0,0 +1,72 @@
+From eb0b53c37b13f7ee32170cfabb3b00d42553f230 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 10:55:47 +0300
+Subject: btrfs: properly flag filesystem with
+ BTRFS_FEATURE_INCOMPAT_BIG_METADATA
+
+From: Nikolay Borisov <nborisov@suse.com>
+
+[ Upstream commit e26b04c4c91925dba57324db177a24e18e2d0013 ]
+
+Commit 6f93e834fa7c seemingly inadvertently moved the code responsible
+for flagging the filesystem as having BIG_METADATA to a place where
+setting the flag was essentially lost. This means that
+filesystems created with kernels containing this bug (starting with 5.15)
+can potentially be mounted by older (pre-3.4) kernels. In reality
+chances for this happening are low because there are other incompat
+flags introduced in the mean time. Still the correct behavior is to set
+INCOMPAT_BIG_METADATA flag and persist this in the superblock.
+
+Fixes: 6f93e834fa7c ("btrfs: fix upper limit for max_inline for page size 64K")
+CC: stable@vger.kernel.org # 5.4+
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: Nikolay Borisov <nborisov@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 21 +++++++++++----------
+ 1 file changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 8daa5bb93a4c..e33f2fa50b0f 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3593,16 +3593,6 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+        */
+       fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
+ 
+-      /*
+-       * Flag our filesystem as having big metadata blocks if they are bigger
+-       * than the page size.
+-       */
+-      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
+-              if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
+-                      btrfs_info(fs_info,
+-                              "flagging fs with big metadata feature");
+-              features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
+-      }
+ 
+       /* Set up fs_info before parsing mount options */
+       nodesize = btrfs_super_nodesize(disk_super);
+@@ -3643,6 +3633,17 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+       if (features & BTRFS_FEATURE_INCOMPAT_SKINNY_METADATA)
+               btrfs_info(fs_info, "has skinny extents");
+ 
++      /*
++       * Flag our filesystem as having big metadata blocks if they are bigger
++       * than the page size.
++       */
++      if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
++              if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
++                      btrfs_info(fs_info,
++                              "flagging fs with big metadata feature");
++              features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
++      }
++
+       /*
+        * mixed block groups end up with duplicate but slightly offset
+        * extent buffers for the same range.  It leads to corruptions
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-reject-log-replay-if-there-is-unsupported-ro-c.patch b/queue-5.19/btrfs-reject-log-replay-if-there-is-unsupported-ro-c.patch

new file mode 100644 (file)

index 0000000..9e3ca8d
--- /dev/null
+++ b/queue-5.19/btrfs-reject-log-replay-if-there-is-unsupported-ro-c.patch
@@ -0,0 +1,89 @@
+From ce8a11f5c1af5069d3c52bc8a1d74d83d8775bb8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 19:48:24 +0800
+Subject: btrfs: reject log replay if there is unsupported RO compat flag
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit dc4d31684974d140250f3ee612c3f0cab13b3146 ]
+
+[BUG]
+If we have a btrfs image with dirty log, along with an unsupported RO
+compatible flag:
+
+log_root               30474240
+...
+compat_flags           0x0
+compat_ro_flags                0x40000003
+                       ( FREE_SPACE_TREE |
+                         FREE_SPACE_TREE_VALID |
+                         unknown flag: 0x40000000 )
+
+Then even if we can only mount it RO, we will still cause metadata
+update for log replay:
+
+  BTRFS info (device dm-1): flagging fs with big metadata feature
+  BTRFS info (device dm-1): using free space tree
+  BTRFS info (device dm-1): has skinny extents
+  BTRFS info (device dm-1): start tree-log replay
+
+This is definitely against RO compact flag requirement.
+
+[CAUSE]
+RO compact flag only forces us to do RO mount, but we will still do log
+replay for plain RO mount.
+
+Thus this will result us to do log replay and update metadata.
+
+This can be very problematic for new RO compat flag, for example older
+kernel can not understand v2 cache, and if we allow metadata update on
+RO mount and invalidate/corrupt v2 cache.
+
+[FIX]
+Just reject the mount unless rescue=nologreplay is provided:
+
+  BTRFS error (device dm-1): cannot replay dirty log with unsupport optional features (0x40000000), try rescue=nologreplay instead
+
+We don't want to set rescue=nologreply directly, as this would make the
+end user to read the old data, and cause confusion.
+
+Since the such case is really rare, we're mostly fine to just reject the
+mount with an error message, which also includes the proper workaround.
+
+CC: stable@vger.kernel.org #4.9+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/disk-io.c | 14 ++++++++++++++
+ 1 file changed, 14 insertions(+)
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index de440ebf5648..8daa5bb93a4c 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3670,6 +3670,20 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
+               err = -EINVAL;
+               goto fail_alloc;
+       }
++      /*
++       * We have unsupported RO compat features, although RO mounted, we
++       * should not cause any metadata write, including log replay.
++       * Or we could screw up whatever the new feature requires.
++       */
++      if (unlikely(features && btrfs_super_log_root(disk_super) &&
++                   !btrfs_test_opt(fs_info, NOLOGREPLAY))) {
++              btrfs_err(fs_info,
++"cannot replay dirty log with unsupported compat_ro features (0x%llx), try rescue=nologreplay",
++                        features);
++              err = -EINVAL;
++              goto fail_alloc;
++      }
++
+ 
+       if (sectorsize < PAGE_SIZE) {
+               struct btrfs_subpage_info *subpage_info;
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch b/queue-5.19/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch

new file mode 100644 (file)

index 0000000..5a1fac4
--- /dev/null
+++ b/queue-5.19/btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch
@@ -0,0 +1,222 @@
+From 80d0e33acb9c7136389c88f3674117bc120e1369 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:40 +0900
+Subject: btrfs: replace BTRFS_MAX_EXTENT_SIZE with fs_info->max_extent_size
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit f7b12a62f008a3041f42f2426983e59a6a0a3c59 ]
+
+On zoned filesystem, data write out is limited by max_zone_append_size,
+and a large ordered extent is split according the size of a bio. OTOH,
+the number of extents to be written is calculated using
+BTRFS_MAX_EXTENT_SIZE, and that estimated number is used to reserve the
+metadata bytes to update and/or create the metadata items.
+
+The metadata reservation is done at e.g, btrfs_buffered_write() and then
+released according to the estimation changes. Thus, if the number of extent
+increases massively, the reserved metadata can run out.
+
+The increase of the number of extents easily occurs on zoned filesystem
+if BTRFS_MAX_EXTENT_SIZE > max_zone_append_size. And, it causes the
+following warning on a small RAM environment with disabling metadata
+over-commit (in the following patch).
+
+[75721.498492] ------------[ cut here ]------------
+[75721.505624] BTRFS: block rsv 1 returned -28
+[75721.512230] WARNING: CPU: 24 PID: 2327559 at fs/btrfs/block-rsv.c:537 btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.581854] CPU: 24 PID: 2327559 Comm: kworker/u64:10 Kdump: loaded Tainted: G        W         5.18.0-rc2-BTRFS-ZNS+ #109
+[75721.597200] Hardware name: Supermicro Super Server/H12SSL-NT, BIOS 2.0 02/22/2021
+[75721.607310] Workqueue: btrfs-endio-write btrfs_work_helper [btrfs]
+[75721.616209] RIP: 0010:btrfs_use_block_rsv+0x560/0x760 [btrfs]
+[75721.646649] RSP: 0018:ffffc9000fbdf3e0 EFLAGS: 00010286
+[75721.654126] RAX: 0000000000000000 RBX: 0000000000004000 RCX: 0000000000000000
+[75721.663524] RDX: 0000000000000004 RSI: 0000000000000008 RDI: fffff52001f7be6e
+[75721.672921] RBP: ffffc9000fbdf420 R08: 0000000000000001 R09: ffff889f8d1fc6c7
+[75721.682493] R10: ffffed13f1a3f8d8 R11: 0000000000000001 R12: ffff88980a3c0e28
+[75721.692284] R13: ffff889b66590000 R14: ffff88980a3c0e40 R15: ffff88980a3c0e8a
+[75721.701878] FS:  0000000000000000(0000) GS:ffff889f8d000000(0000) knlGS:0000000000000000
+[75721.712601] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[75721.720726] CR2: 000055d12e05c018 CR3: 0000800193594000 CR4: 0000000000350ee0
+[75721.730499] Call Trace:
+[75721.735166]  <TASK>
+[75721.739886]  btrfs_alloc_tree_block+0x1e1/0x1100 [btrfs]
+[75721.747545]  ? btrfs_alloc_logged_file_extent+0x550/0x550 [btrfs]
+[75721.756145]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.762852]  ? btrfs_get_32+0xea/0x2d0 [btrfs]
+[75721.769520]  ? push_leaf_left+0x420/0x620 [btrfs]
+[75721.776431]  ? memcpy+0x4e/0x60
+[75721.781931]  split_leaf+0x433/0x12d0 [btrfs]
+[75721.788392]  ? btrfs_get_token_32+0x580/0x580 [btrfs]
+[75721.795636]  ? push_for_double_split.isra.0+0x420/0x420 [btrfs]
+[75721.803759]  ? leaf_space_used+0x15d/0x1a0 [btrfs]
+[75721.811156]  btrfs_search_slot+0x1bc3/0x2790 [btrfs]
+[75721.818300]  ? lock_downgrade+0x7c0/0x7c0
+[75721.824411]  ? free_extent_buffer.part.0+0x107/0x200 [btrfs]
+[75721.832456]  ? split_leaf+0x12d0/0x12d0 [btrfs]
+[75721.839149]  ? free_extent_buffer.part.0+0x14f/0x200 [btrfs]
+[75721.846945]  ? free_extent_buffer+0x13/0x20 [btrfs]
+[75721.853960]  ? btrfs_release_path+0x4b/0x190 [btrfs]
+[75721.861429]  btrfs_csum_file_blocks+0x85c/0x1500 [btrfs]
+[75721.869313]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.876085]  ? lock_release+0x552/0xf80
+[75721.881957]  ? btrfs_del_csums+0x8c0/0x8c0 [btrfs]
+[75721.888886]  ? __kasan_check_write+0x14/0x20
+[75721.895152]  ? do_raw_read_unlock+0x44/0x80
+[75721.901323]  ? _raw_write_lock_irq+0x60/0x80
+[75721.907983]  ? btrfs_global_root+0xb9/0xe0 [btrfs]
+[75721.915166]  ? btrfs_csum_root+0x12b/0x180 [btrfs]
+[75721.921918]  ? btrfs_get_global_root+0x820/0x820 [btrfs]
+[75721.929166]  ? _raw_write_unlock+0x23/0x40
+[75721.935116]  ? unpin_extent_cache+0x1e3/0x390 [btrfs]
+[75721.942041]  btrfs_finish_ordered_io.isra.0+0xa0c/0x1dc0 [btrfs]
+[75721.949906]  ? try_to_wake_up+0x30/0x14a0
+[75721.955700]  ? btrfs_unlink_subvol+0xda0/0xda0 [btrfs]
+[75721.962661]  ? rcu_read_lock_sched_held+0x16/0x80
+[75721.969111]  ? lock_acquire+0x41b/0x4c0
+[75721.974982]  finish_ordered_fn+0x15/0x20 [btrfs]
+[75721.981639]  btrfs_work_helper+0x1af/0xa80 [btrfs]
+[75721.988184]  ? _raw_spin_unlock_irq+0x28/0x50
+[75721.994643]  process_one_work+0x815/0x1460
+[75722.000444]  ? pwq_dec_nr_in_flight+0x250/0x250
+[75722.006643]  ? do_raw_spin_trylock+0xbb/0x190
+[75722.013086]  worker_thread+0x59a/0xeb0
+[75722.018511]  kthread+0x2ac/0x360
+[75722.023428]  ? process_one_work+0x1460/0x1460
+[75722.029431]  ? kthread_complete_and_exit+0x30/0x30
+[75722.036044]  ret_from_fork+0x22/0x30
+[75722.041255]  </TASK>
+[75722.045047] irq event stamp: 0
+[75722.049703] hardirqs last  enabled at (0): [<0000000000000000>] 0x0
+[75722.057610] hardirqs last disabled at (0): [<ffffffff8118a94a>] copy_process+0x1c1a/0x66b0
+[75722.067533] softirqs last  enabled at (0): [<ffffffff8118a989>] copy_process+0x1c59/0x66b0
+[75722.077423] softirqs last disabled at (0): [<0000000000000000>] 0x0
+[75722.085335] ---[ end trace 0000000000000000 ]---
+
+To fix the estimation, we need to introduce fs_info->max_extent_size to
+replace BTRFS_MAX_EXTENT_SIZE, which allow setting the different size for
+regular vs zoned filesystem.
+
+Set fs_info->max_extent_size to BTRFS_MAX_EXTENT_SIZE by default. On zoned
+filesystem, it is set to fs_info->max_zone_append_size.
+
+CC: stable@vger.kernel.org # 5.12+
+Fixes: d8e3fb106f39 ("btrfs: zoned: use ZONE_APPEND write for zoned mode")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h     | 6 ++++++
+ fs/btrfs/disk-io.c   | 2 ++
+ fs/btrfs/extent_io.c | 4 +++-
+ fs/btrfs/inode.c     | 6 ++++--
+ fs/btrfs/zoned.c     | 5 ++++-
+ 5 files changed, 19 insertions(+), 4 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 7abfbfd7c94c..364c71ad7cce 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1032,6 +1032,12 @@ struct btrfs_fs_info {
+       u32 csums_per_leaf;
+       u32 stripesize;
+ 
++      /*
++       * Maximum size of an extent. BTRFS_MAX_EXTENT_SIZE on regular
++       * filesystem, on zoned it depends on the device constraints.
++       */
++      u64 max_extent_size;
++
+       /* Block groups and devices containing active swapfiles. */
+       spinlock_t swapfile_pins_lock;
+       struct rb_root swapfile_pins;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index e33f2fa50b0f..804dcc69787d 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3262,6 +3262,8 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+       fs_info->sectorsize_bits = ilog2(4096);
+       fs_info->stripesize = 4096;
+ 
++      fs_info->max_extent_size = BTRFS_MAX_EXTENT_SIZE;
++
+       spin_lock_init(&fs_info->swapfile_pins_lock);
+       fs_info->swapfile_pins = RB_ROOT;
+ 
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index f03ab5dbda7a..cda25018ebd7 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -2007,10 +2007,12 @@ noinline_for_stack bool find_lock_delalloc_range(struct inode *inode,
+                                   struct page *locked_page, u64 *start,
+                                   u64 *end)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
+       const u64 orig_start = *start;
+       const u64 orig_end = *end;
+-      u64 max_bytes = BTRFS_MAX_EXTENT_SIZE;
++      /* The sanity tests may not set a valid fs_info. */
++      u64 max_bytes = fs_info ? fs_info->max_extent_size : BTRFS_MAX_EXTENT_SIZE;
+       u64 delalloc_start;
+       u64 delalloc_end;
+       bool found;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 25872ee8594e..d9123bfeae8d 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2200,6 +2200,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+ void btrfs_split_delalloc_extent(struct inode *inode,
+                                struct extent_state *orig, u64 split)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 size;
+ 
+       /* not delalloc, ignore it */
+@@ -2207,7 +2208,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+               return;
+ 
+       size = orig->end - orig->start + 1;
+-      if (size > BTRFS_MAX_EXTENT_SIZE) {
++      if (size > fs_info->max_extent_size) {
+               u32 num_extents;
+               u64 new_size;
+ 
+@@ -2236,6 +2237,7 @@ void btrfs_split_delalloc_extent(struct inode *inode,
+ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+                                struct extent_state *other)
+ {
++      struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+       u64 new_size, old_size;
+       u32 num_extents;
+ 
+@@ -2249,7 +2251,7 @@ void btrfs_merge_delalloc_extent(struct inode *inode, struct extent_state *new,
+               new_size = other->end - new->start + 1;
+ 
+       /* we're not bigger than the max, unreserve the space and go */
+-      if (new_size <= BTRFS_MAX_EXTENT_SIZE) {
++      if (new_size <= fs_info->max_extent_size) {
+               spin_lock(&BTRFS_I(inode)->lock);
+               btrfs_mod_outstanding_extents(BTRFS_I(inode), -1);
+               spin_unlock(&BTRFS_I(inode)->lock);
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 52607569cf49..7ac2d7cfca31 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -739,8 +739,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       }
+ 
+       fs_info->zone_size = zone_size;
+-      fs_info->max_zone_append_size = max_zone_append_size;
++      fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
++                                                 fs_info->sectorsize);
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
++      if (fs_info->max_zone_append_size < fs_info->max_extent_size)
++              fs_info->max_extent_size = fs_info->max_zone_append_size;
+ 
+       /*
+        * Check mount options here, because we might change fs_info->zoned
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch b/queue-5.19/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch

new file mode 100644 (file)

index 0000000..086879d
--- /dev/null
+++ b/queue-5.19/btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch
@@ -0,0 +1,42 @@
+From 1ccd0a5d14b315195f893c34b57a80f21cb39309 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 18:31:17 -0400
+Subject: btrfs: reset block group chunk force if we have to wait
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit 1314ca78b2c35d3e7d0f097268a2ee6dc0d369ef ]
+
+If you try to force a chunk allocation, but you race with another chunk
+allocation, you will end up waiting on the chunk allocation that just
+occurred and then allocate another chunk.  If you have many threads all
+doing this at once you can way over-allocate chunks.
+
+Fix this by resetting force to NO_FORCE, that way if we think we need to
+allocate we can, otherwise we don't force another chunk allocation if
+one is already happening.
+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+CC: stable@vger.kernel.org # 5.4+
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index ede389f2602d..13358fbc1629 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -3761,6 +3761,7 @@ int btrfs_chunk_alloc(struct btrfs_trans_handle *trans, u64 flags,
+                        * attempt.
+                        */
+                       wait_for_alloc = true;
++                      force = CHUNK_ALLOC_NO_FORCE;
+                       spin_unlock(&space_info->lock);
+                       mutex_lock(&fs_info->chunk_mutex);
+                       mutex_unlock(&fs_info->chunk_mutex);
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-store-chunk-size-in-space-info-struct.patch b/queue-5.19/btrfs-store-chunk-size-in-space-info-struct.patch

new file mode 100644 (file)

index 0000000..e1d07af
--- /dev/null
+++ b/queue-5.19/btrfs-store-chunk-size-in-space-info-struct.patch
@@ -0,0 +1,141 @@
+From 2fabc0d2cf1e6ca7eee07d618054b1311be2c185 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 8 Feb 2022 11:31:20 -0800
+Subject: btrfs: store chunk size in space-info struct
+
+From: Stefan Roesch <shr@fb.com>
+
+[ Upstream commit f6fca3917b4d99d8c13901738afec35f570a3c2f ]
+
+The chunk size is stored in the btrfs_space_info structure.  It is
+initialized at the start and is then used.
+
+A new API is added to update the current chunk size.  This API is used
+to be able to expose the chunk_size as a sysfs setting.
+
+Signed-off-by: Stefan Roesch <shr@fb.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+[ rename and merge helpers, switch atomic type to u64, style fixes ]
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 32 ++++++++++++++++++++++++++++++++
+ fs/btrfs/space-info.h |  4 ++++
+ fs/btrfs/volumes.c    | 28 +++++++++-------------------
+ 3 files changed, 45 insertions(+), 19 deletions(-)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index f301149c7597..51fbfd716623 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -187,6 +187,37 @@ void btrfs_clear_space_info_full(struct btrfs_fs_info *info)
+  */
+ #define BTRFS_DEFAULT_ZONED_RECLAIM_THRESH                    (75)
+ 
++/*
++ * Calculate chunk size depending on volume type (regular or zoned).
++ */
++static u64 calc_chunk_size(const struct btrfs_fs_info *fs_info, u64 flags)
++{
++      if (btrfs_is_zoned(fs_info))
++              return fs_info->zone_size;
++
++      ASSERT(flags & BTRFS_BLOCK_GROUP_TYPE_MASK);
++
++      if (flags & BTRFS_BLOCK_GROUP_DATA)
++              return SZ_1G;
++      else if (flags & BTRFS_BLOCK_GROUP_SYSTEM)
++              return SZ_32M;
++
++      /* Handle BTRFS_BLOCK_GROUP_METADATA */
++      if (fs_info->fs_devices->total_rw_bytes > 50ULL * SZ_1G)
++              return SZ_1G;
++
++      return SZ_256M;
++}
++
++/*
++ * Update default chunk size.
++ */
++void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
++                                      u64 chunk_size)
++{
++      WRITE_ONCE(space_info->chunk_size, chunk_size);
++}
++
+ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+ {
+ 
+@@ -208,6 +239,7 @@ static int create_space_info(struct btrfs_fs_info *info, u64 flags)
+       INIT_LIST_HEAD(&space_info->tickets);
+       INIT_LIST_HEAD(&space_info->priority_tickets);
+       space_info->clamp = 1;
++      btrfs_update_space_info_chunk_size(space_info, calc_chunk_size(info, flags));
+ 
+       if (btrfs_is_zoned(info))
+               space_info->bg_reclaim_threshold = BTRFS_DEFAULT_ZONED_RECLAIM_THRESH;
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index c096695598c1..e7de24a529cf 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -25,6 +25,8 @@ struct btrfs_space_info {
+       u64 max_extent_size;    /* This will hold the maximum extent size of
+                                  the space info if we had an ENOSPC in the
+                                  allocator. */
++      /* Chunk size in bytes */
++      u64 chunk_size;
+ 
+       /*
+        * Once a block group drops below this threshold (percents) we'll
+@@ -123,6 +125,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+                            struct btrfs_space_info **space_info);
++void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
++                                      u64 chunk_size);
+ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+                                              u64 flags);
+ u64 __pure btrfs_space_info_used(struct btrfs_space_info *s_info,
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 9c20049d1fec..9cd9d06f5469 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -5071,26 +5071,16 @@ static void init_alloc_chunk_ctl_policy_regular(
+                               struct btrfs_fs_devices *fs_devices,
+                               struct alloc_chunk_ctl *ctl)
+ {
+-      u64 type = ctl->type;
++      struct btrfs_space_info *space_info;
+ 
+-      if (type & BTRFS_BLOCK_GROUP_DATA) {
+-              ctl->max_stripe_size = SZ_1G;
+-              ctl->max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
+-      } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+-              /* For larger filesystems, use larger metadata chunks */
+-              if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+-                      ctl->max_stripe_size = SZ_1G;
+-              else
+-                      ctl->max_stripe_size = SZ_256M;
+-              ctl->max_chunk_size = ctl->max_stripe_size;
+-      } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+-              ctl->max_stripe_size = SZ_32M;
+-              ctl->max_chunk_size = 2 * ctl->max_stripe_size;
+-              ctl->devs_max = min_t(int, ctl->devs_max,
+-                                    BTRFS_MAX_DEVS_SYS_CHUNK);
+-      } else {
+-              BUG();
+-      }
++      space_info = btrfs_find_space_info(fs_devices->fs_info, ctl->type);
++      ASSERT(space_info);
++
++      ctl->max_chunk_size = READ_ONCE(space_info->chunk_size);
++      ctl->max_stripe_size = ctl->max_chunk_size;
++
++      if (ctl->type & BTRFS_BLOCK_GROUP_SYSTEM)
++              ctl->devs_max = min_t(int, ctl->devs_max, BTRFS_MAX_DEVS_SYS_CHUNK);
+ 
+       /* We don't want a chunk larger than 10% of writable space */
+       ctl->max_chunk_size = min(div_factor(fs_devices->total_rw_bytes, 1),
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-tree-log-make-the-return-value-for-log-syncing.patch b/queue-5.19/btrfs-tree-log-make-the-return-value-for-log-syncing.patch

new file mode 100644 (file)

index 0000000..79c7ad6
--- /dev/null
+++ b/queue-5.19/btrfs-tree-log-make-the-return-value-for-log-syncing.patch
@@ -0,0 +1,142 @@
+From 4ab9336943a44e535a6bb785622dcb3484638d78 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 15:09:48 -0400
+Subject: btrfs: tree-log: make the return value for log syncing consistent
+
+From: Josef Bacik <josef@toxicpanda.com>
+
+[ Upstream commit f31f09f6be1c6c1a673e0566e258281a7bbaaa51 ]
+
+Currently we will return 1 or -EAGAIN if we decide we need to commit
+the transaction rather than sync the log.  In practice this doesn't
+really matter, we interpret any !0 and !BTRFS_NO_LOG_SYNC as needing to
+commit the transaction.  However this makes it hard to figure out what
+the correct thing to do is.
+
+Fix this up by defining BTRFS_LOG_FORCE_COMMIT and using this in all the
+places where we want to force the transaction to be committed.
+
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Filipe Manana <fdmanana@suse.com>
+Signed-off-by: Josef Bacik <josef@toxicpanda.com>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/file.c     |  2 +-
+ fs/btrfs/tree-log.c | 18 +++++++++---------
+ fs/btrfs/tree-log.h |  3 +++
+ 3 files changed, 13 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 9dfde1af8a64..89c6d7ff1987 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -2308,7 +2308,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
+       btrfs_release_log_ctx_extents(&ctx);
+       if (ret < 0) {
+               /* Fallthrough and commit/free transaction. */
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+       }
+ 
+       /* we've logged all the items and now have a consistent
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 370388fadf96..c94713c811bb 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -171,7 +171,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
+               int index = (root->log_transid + 1) % 2;
+ 
+               if (btrfs_need_log_full_commit(trans)) {
+-                      ret = -EAGAIN;
++                      ret = BTRFS_LOG_FORCE_COMMIT;
+                       goto out;
+               }
+ 
+@@ -194,7 +194,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans,
+                * writing.
+                */
+               if (zoned && !created) {
+-                      ret = -EAGAIN;
++                      ret = BTRFS_LOG_FORCE_COMMIT;
+                       goto out;
+               }
+ 
+@@ -3121,7 +3121,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ 
+       /* bail out if we need to do a full commit */
+       if (btrfs_need_log_full_commit(trans)) {
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               mutex_unlock(&root->log_mutex);
+               goto out;
+       }
+@@ -3222,7 +3222,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+               }
+               btrfs_wait_tree_log_extents(log, mark);
+               mutex_unlock(&log_root_tree->log_mutex);
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out;
+       }
+ 
+@@ -3261,7 +3261,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+               blk_finish_plug(&plug);
+               btrfs_wait_tree_log_extents(log, mark);
+               mutex_unlock(&log_root_tree->log_mutex);
+-              ret = -EAGAIN;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out_wake_log_root;
+       }
+ 
+@@ -5848,7 +5848,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
+           inode_only == LOG_INODE_ALL &&
+           inode->last_unlink_trans >= trans->transid) {
+               btrfs_set_log_full_commit(trans);
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto out_unlock;
+       }
+ 
+@@ -6562,12 +6562,12 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+       bool log_dentries = false;
+ 
+       if (btrfs_test_opt(fs_info, NOTREELOG)) {
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto end_no_trans;
+       }
+ 
+       if (btrfs_root_refs(&root->root_item) == 0) {
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+               goto end_no_trans;
+       }
+ 
+@@ -6665,7 +6665,7 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
+ end_trans:
+       if (ret < 0) {
+               btrfs_set_log_full_commit(trans);
+-              ret = 1;
++              ret = BTRFS_LOG_FORCE_COMMIT;
+       }
+ 
+       if (ret)
+diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
+index 1620f8170629..57ab5f3b8dc7 100644
+--- a/fs/btrfs/tree-log.h
++++ b/fs/btrfs/tree-log.h
+@@ -12,6 +12,9 @@
+ /* return value for btrfs_log_dentry_safe that means we don't need to log it at all */
+ #define BTRFS_NO_LOG_SYNC 256
+ 
++/* We can't use the tree log for whatever reason, force a transaction commit */
++#define BTRFS_LOG_FORCE_COMMIT                                (1)
++
+ struct btrfs_log_ctx {
+       int log_ret;
+       int log_transid;
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch b/queue-5.19/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch

new file mode 100644 (file)

index 0000000..c8fc1cb
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-activate-metadata-block-group-on-flush_s.patch
@@ -0,0 +1,180 @@
+From a86a8bab5c455d813a71c98fb351f047c1e46965 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:47 +0900
+Subject: btrfs: zoned: activate metadata block group on flush_space
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit b0931513913633044ed6e3800334c28433c007b0 ]
+
+For metadata space on zoned filesystem, reaching ALLOC_CHUNK{,_FORCE}
+means we don't have enough space left in the active_total_bytes. Before
+allocating a new chunk, we can try to activate an existing block group
+in this case.
+
+Also, allocating a chunk is not enough to grant a ticket for metadata
+space on zoned filesystem we need to activate the block group to
+increase the active_total_bytes.
+
+btrfs_zoned_activate_one_bg() implements the activation feature. It will
+activate a block group by (maybe) finishing a block group. It will give up
+activating a block group if it cannot finish any block group.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 30 ++++++++++++++++++++++++
+ fs/btrfs/zoned.c      | 53 +++++++++++++++++++++++++++++++++++++++++++
+ fs/btrfs/zoned.h      | 10 ++++++++
+ 3 files changed, 93 insertions(+)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index ad13b9d207b1..b0c5b4738b1f 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -9,6 +9,7 @@
+ #include "ordered-data.h"
+ #include "transaction.h"
+ #include "block-group.h"
++#include "zoned.h"
+ 
+ /*
+  * HOW DOES SPACE RESERVATION WORK
+@@ -724,6 +725,18 @@ static void flush_space(struct btrfs_fs_info *fs_info,
+               break;
+       case ALLOC_CHUNK:
+       case ALLOC_CHUNK_FORCE:
++              /*
++               * For metadata space on zoned filesystem, reaching here means we
++               * don't have enough space left in active_total_bytes. Try to
++               * activate a block group first, because we may have inactive
++               * block group already allocated.
++               */
++              ret = btrfs_zoned_activate_one_bg(fs_info, space_info, false);
++              if (ret < 0)
++                      break;
++              else if (ret == 1)
++                      break;
++
+               trans = btrfs_join_transaction(root);
+               if (IS_ERR(trans)) {
+                       ret = PTR_ERR(trans);
+@@ -734,6 +747,23 @@ static void flush_space(struct btrfs_fs_info *fs_info,
+                               (state == ALLOC_CHUNK) ? CHUNK_ALLOC_NO_FORCE :
+                                       CHUNK_ALLOC_FORCE);
+               btrfs_end_transaction(trans);
++
++              /*
++               * For metadata space on zoned filesystem, allocating a new chunk
++               * is not enough. We still need to activate the block * group.
++               * Active the newly allocated block group by (maybe) finishing
++               * a block group.
++               */
++              if (ret == 1) {
++                      ret = btrfs_zoned_activate_one_bg(fs_info, space_info, true);
++                      /*
++                       * Revert to the original ret regardless we could finish
++                       * one block group or not.
++                       */
++                      if (ret >= 0)
++                              ret = 1;
++              }
++
+               if (ret > 0 || ret == -ENOSPC)
+                       ret = 0;
+               break;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 16ed426a58c9..4df5b36dc574 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2226,3 +2226,56 @@ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+ 
+       return ret < 0 ? ret : 1;
+ }
++
++int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                              struct btrfs_space_info *space_info,
++                              bool do_finish)
++{
++      struct btrfs_block_group *bg;
++      int index;
++
++      if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
++              return 0;
++
++      /* No more block groups to activate */
++      if (space_info->active_total_bytes == space_info->total_bytes)
++              return 0;
++
++      for (;;) {
++              int ret;
++              bool need_finish = false;
++
++              down_read(&space_info->groups_sem);
++              for (index = 0; index < BTRFS_NR_RAID_TYPES; index++) {
++                      list_for_each_entry(bg, &space_info->block_groups[index],
++                                          list) {
++                              if (!spin_trylock(&bg->lock))
++                                      continue;
++                              if (btrfs_zoned_bg_is_full(bg) || bg->zone_is_active) {
++                                      spin_unlock(&bg->lock);
++                                      continue;
++                              }
++                              spin_unlock(&bg->lock);
++
++                              if (btrfs_zone_activate(bg)) {
++                                      up_read(&space_info->groups_sem);
++                                      return 1;
++                              }
++
++                              need_finish = true;
++                      }
++              }
++              up_read(&space_info->groups_sem);
++
++              if (!do_finish || !need_finish)
++                      break;
++
++              ret = btrfs_zone_finish_one_bg(fs_info);
++              if (ret == 0)
++                      break;
++              if (ret < 0)
++                      return ret;
++      }
++
++      return 0;
++}
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 329d28e2fd8d..e17462db3a84 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -81,6 +81,8 @@ bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
+ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+                                      u64 length);
+ int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
++int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                              struct btrfs_space_info *space_info, bool do_finish);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+                                    struct blk_zone *zone)
+@@ -256,6 +258,14 @@ static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
+       return 1;
+ }
+ 
++static inline int btrfs_zoned_activate_one_bg(struct btrfs_fs_info *fs_info,
++                                            struct btrfs_space_info *space_info,
++                                            bool do_finish)
++{
++      /* Consider all the block groups are active */
++      return 0;
++}
++
+ #endif
+ 
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-activate-necessary-block-group.patch b/queue-5.19/btrfs-zoned-activate-necessary-block-group.patch

new file mode 100644 (file)

index 0000000..504d136
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-activate-necessary-block-group.patch
@@ -0,0 +1,60 @@
+From 2a7ead250951a93007552d9497104f2e83846bf9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:48 +0900
+Subject: btrfs: zoned: activate necessary block group
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit b6a98021e4019c562a23ad151a7e40adfa9f91e5 ]
+
+There are two places where allocating a chunk is not enough. These two
+places are trying to ensure the space by allocating a chunk. To meet the
+condition for active_total_bytes, we also need to activate a block group
+there.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 20c78ae7d150..5627b43d4cc2 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -2665,6 +2665,14 @@ int btrfs_inc_block_group_ro(struct btrfs_block_group *cache,
+       ret = btrfs_chunk_alloc(trans, alloc_flags, CHUNK_ALLOC_FORCE);
+       if (ret < 0)
+               goto out;
++      /*
++       * We have allocated a new chunk. We also need to activate that chunk to
++       * grant metadata tickets for zoned filesystem.
++       */
++      ret = btrfs_zoned_activate_one_bg(fs_info, cache->space_info, true);
++      if (ret < 0)
++              goto out;
++
+       ret = inc_block_group_ro(cache, 0);
+       if (ret == -ETXTBSY)
+               goto unlock_out;
+@@ -3890,6 +3898,14 @@ static void reserve_chunk_space(struct btrfs_trans_handle *trans,
+               if (IS_ERR(bg)) {
+                       ret = PTR_ERR(bg);
+               } else {
++                      /*
++                       * We have a new chunk. We also need to activate it for
++                       * zoned filesystem.
++                       */
++                      ret = btrfs_zoned_activate_one_bg(fs_info, info, true);
++                      if (ret < 0)
++                              return;
++
+                       /*
+                        * If we fail to add the chunk item here, we end up
+                        * trying again at phase 2 of chunk allocation, at
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch b/queue-5.19/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch

new file mode 100644 (file)

index 0000000..25605a4
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-disable-metadata-overcommit-for-zoned.patch
@@ -0,0 +1,46 @@
+From e5de08533b632d5a0e377d481b50165ab8e5644b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:46 +0900
+Subject: btrfs: zoned: disable metadata overcommit for zoned
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 79417d040f4f77b19c701bccc23013b9cdac358d ]
+
+The metadata overcommit makes the space reservation flexible but it is also
+harmful to active zone tracking. Since we cannot finish a block group from
+the metadata allocation context, we might not activate a new block group
+and might not be able to actually write out the overcommit reservations.
+
+So, disable metadata overcommit for zoned filesystems. We will ensure
+the reservations are under active_total_bytes in the following patches.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/space-info.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 2dd8754cb990..f301149c7597 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -349,7 +349,10 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+               return 0;
+ 
+       used = btrfs_space_info_used(space_info, true);
+-      avail = calc_available_free_space(fs_info, space_info, flush);
++      if (btrfs_is_zoned(fs_info) && (space_info->flags & BTRFS_BLOCK_GROUP_METADATA))
++              avail = 0;
++      else
++              avail = calc_available_free_space(fs_info, space_info, flush);
+ 
+       if (used + bytes < space_info->total_bytes + avail)
+               return 1;
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-finish-least-available-block-group-on-da.patch b/queue-5.19/btrfs-zoned-finish-least-available-block-group-on-da.patch

new file mode 100644 (file)

index 0000000..01654a5
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-finish-least-available-block-group-on-da.patch
@@ -0,0 +1,188 @@
+From 0718aff138fe17a649ca20057b6b957a4ecdcea1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:44 +0900
+Subject: btrfs: zoned: finish least available block group on data bg
+ allocation
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 393f646e34c18b85d0f41272bfcbd475ae3a0d34 ]
+
+When we run out of active zones and no sufficient space is left in any
+block groups, we need to finish one block group to make room to activate a
+new block group.
+
+However, we cannot do this for metadata block groups because we can cause a
+deadlock by waiting for a running transaction commit. So, do that only for
+a data block group.
+
+Furthermore, the block group to be finished has two requirements. First,
+the block group must not have reserved bytes left. Having reserved bytes
+means we have an allocated region but did not yet send bios for it. If that
+region is allocated by the thread calling btrfs_zone_finish(), it results
+in a deadlock.
+
+Second, the block group to be finished must not be a SYSTEM block
+group. Finishing a SYSTEM block group easily breaks further chunk
+allocation by nullifying the SYSTEM free space.
+
+In a certain case, we cannot find any zone finish candidate or
+btrfs_zone_finish() may fail. In that case, we fall back to split the
+allocation bytes and fill the last spaces left in the block groups.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 50 +++++++++++++++++++++++++++++++++---------
+ fs/btrfs/zoned.c       | 40 +++++++++++++++++++++++++++++++++
+ fs/btrfs/zoned.h       |  7 ++++++
+ 3 files changed, 87 insertions(+), 10 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 506852795db1..ad45083c6461 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -3981,6 +3981,45 @@ static void found_extent(struct find_free_extent_ctl *ffe_ctl,
+       }
+ }
+ 
++static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
++                                  struct find_free_extent_ctl *ffe_ctl)
++{
++      /* If we can activate new zone, just allocate a chunk and use it */
++      if (btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
++              return 0;
++
++      /*
++       * We already reached the max active zones. Try to finish one block
++       * group to make a room for a new block group. This is only possible
++       * for a data block group because btrfs_zone_finish() may need to wait
++       * for a running transaction which can cause a deadlock for metadata
++       * allocation.
++       */
++      if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA) {
++              int ret = btrfs_zone_finish_one_bg(fs_info);
++
++              if (ret == 1)
++                      return 0;
++              else if (ret < 0)
++                      return ret;
++      }
++
++      /*
++       * If we have enough free space left in an already active block group
++       * and we can't activate any other zone now, do not allow allocating a
++       * new chunk and let find_free_extent() retry with a smaller size.
++       */
++      if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
++              return -ENOSPC;
++
++      /*
++       * We cannot activate a new block group and no enough space left in any
++       * block groups. So, allocating a new block group may not help. But,
++       * there is nothing to do anyway, so let's go with it.
++       */
++      return 0;
++}
++
+ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
+                             struct find_free_extent_ctl *ffe_ctl)
+ {
+@@ -3988,16 +4027,7 @@ static int can_allocate_chunk(struct btrfs_fs_info *fs_info,
+       case BTRFS_EXTENT_ALLOC_CLUSTERED:
+               return 0;
+       case BTRFS_EXTENT_ALLOC_ZONED:
+-              /*
+-               * If we have enough free space left in an already
+-               * active block group and we can't activate any other
+-               * zone now, do not allow allocating a new chunk and
+-               * let find_free_extent() retry with a smaller size.
+-               */
+-              if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size &&
+-                  !btrfs_can_activate_zone(fs_info->fs_devices, ffe_ctl->flags))
+-                      return -ENOSPC;
+-              return 0;
++              return can_allocate_chunk_zoned(fs_info, ffe_ctl);
+       default:
+               BUG();
+       }
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 7ac2d7cfca31..eb9eb9e72187 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2180,3 +2180,43 @@ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logica
+       spin_unlock(&block_group->lock);
+       btrfs_put_block_group(block_group);
+ }
++
++int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
++{
++      struct btrfs_block_group *block_group;
++      struct btrfs_block_group *min_bg = NULL;
++      u64 min_avail = U64_MAX;
++      int ret;
++
++      spin_lock(&fs_info->zone_active_bgs_lock);
++      list_for_each_entry(block_group, &fs_info->zone_active_bgs,
++                          active_bg_list) {
++              u64 avail;
++
++              spin_lock(&block_group->lock);
++              if (block_group->reserved ||
++                  (block_group->flags & BTRFS_BLOCK_GROUP_SYSTEM)) {
++                      spin_unlock(&block_group->lock);
++                      continue;
++              }
++
++              avail = block_group->zone_capacity - block_group->alloc_offset;
++              if (min_avail > avail) {
++                      if (min_bg)
++                              btrfs_put_block_group(min_bg);
++                      min_bg = block_group;
++                      min_avail = avail;
++                      btrfs_get_block_group(min_bg);
++              }
++              spin_unlock(&block_group->lock);
++      }
++      spin_unlock(&fs_info->zone_active_bgs_lock);
++
++      if (!min_bg)
++              return 0;
++
++      ret = btrfs_zone_finish(min_bg);
++      btrfs_put_block_group(min_bg);
++
++      return ret < 0 ? ret : 1;
++}
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 9caeab07fd38..329d28e2fd8d 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -80,6 +80,7 @@ void btrfs_free_zone_cache(struct btrfs_fs_info *fs_info);
+ bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info);
+ void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info, u64 logical,
+                                      u64 length);
++int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info);
+ #else /* CONFIG_BLK_DEV_ZONED */
+ static inline int btrfs_get_dev_zone(struct btrfs_device *device, u64 pos,
+                                    struct blk_zone *zone)
+@@ -249,6 +250,12 @@ static inline bool btrfs_zoned_should_reclaim(struct btrfs_fs_info *fs_info)
+ 
+ static inline void btrfs_zoned_release_data_reloc_bg(struct btrfs_fs_info *fs_info,
+                                                    u64 logical, u64 length) { }
++
++static inline int btrfs_zone_finish_one_bg(struct btrfs_fs_info *fs_info)
++{
++      return 1;
++}
++
+ #endif
+ 
+ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-introduce-space_info-active_total_bytes.patch b/queue-5.19/btrfs-zoned-introduce-space_info-active_total_bytes.patch

new file mode 100644 (file)

index 0000000..7203ea4
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-introduce-space_info-active_total_bytes.patch
@@ -0,0 +1,257 @@
+From d392aa8cff67373349cb32d9e9f0d34feed0dc1d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:45 +0900
+Subject: btrfs: zoned: introduce space_info->active_total_bytes
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 6a921de589926a350634e6e279f43fa5b9dbf5ba ]
+
+The active_total_bytes, like the total_bytes, accounts for the total bytes
+of active block groups in the space_info.
+
+With an introduction of active_total_bytes, we can check if the reserved
+bytes can be written to the block groups without activating a new block
+group. The check is necessary for metadata allocation on zoned
+filesystem. We cannot finish a block group, which may require waiting
+for the current transaction, from the metadata allocation context.
+Instead, we need to ensure the ongoing allocation (reserved bytes) fits
+in active block groups.
+
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/block-group.c | 12 +++++++++---
+ fs/btrfs/space-info.c  | 41 ++++++++++++++++++++++++++++++++---------
+ fs/btrfs/space-info.h  |  4 +++-
+ fs/btrfs/zoned.c       |  6 ++++++
+ 4 files changed, 50 insertions(+), 13 deletions(-)
+
+diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
+index 13358fbc1629..20c78ae7d150 100644
+--- a/fs/btrfs/block-group.c
++++ b/fs/btrfs/block-group.c
+@@ -1051,8 +1051,13 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
+                       < block_group->zone_unusable);
+               WARN_ON(block_group->space_info->disk_total
+                       < block_group->length * factor);
++              WARN_ON(block_group->zone_is_active &&
++                      block_group->space_info->active_total_bytes
++                      < block_group->length);
+       }
+       block_group->space_info->total_bytes -= block_group->length;
++      if (block_group->zone_is_active)
++              block_group->space_info->active_total_bytes -= block_group->length;
+       block_group->space_info->bytes_readonly -=
+               (block_group->length - block_group->zone_unusable);
+       block_group->space_info->bytes_zone_unusable -=
+@@ -2108,7 +2113,8 @@ static int read_one_block_group(struct btrfs_fs_info *info,
+       trace_btrfs_add_block_group(info, cache, 0);
+       btrfs_update_space_info(info, cache->flags, cache->length,
+                               cache->used, cache->bytes_super,
+-                              cache->zone_unusable, &space_info);
++                              cache->zone_unusable, cache->zone_is_active,
++                              &space_info);
+ 
+       cache->space_info = space_info;
+ 
+@@ -2178,7 +2184,7 @@ static int fill_dummy_bgs(struct btrfs_fs_info *fs_info)
+               }
+ 
+               btrfs_update_space_info(fs_info, bg->flags, em->len, em->len,
+-                                      0, 0, &space_info);
++                                      0, 0, false, &space_info);
+               bg->space_info = space_info;
+               link_block_group(bg);
+ 
+@@ -2559,7 +2565,7 @@ struct btrfs_block_group *btrfs_make_block_group(struct btrfs_trans_handle *tran
+       trace_btrfs_add_block_group(fs_info, cache, 1);
+       btrfs_update_space_info(fs_info, cache->flags, size, bytes_used,
+                               cache->bytes_super, cache->zone_unusable,
+-                              &cache->space_info);
++                              cache->zone_is_active, &cache->space_info);
+       btrfs_update_global_block_rsv(fs_info);
+ 
+       link_block_group(cache);
+diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
+index 51fbfd716623..ad13b9d207b1 100644
+--- a/fs/btrfs/space-info.c
++++ b/fs/btrfs/space-info.c
+@@ -295,7 +295,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info)
+ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+-                           struct btrfs_space_info **space_info)
++                           bool active, struct btrfs_space_info **space_info)
+ {
+       struct btrfs_space_info *found;
+       int factor;
+@@ -306,6 +306,8 @@ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+       ASSERT(found);
+       spin_lock(&found->lock);
+       found->total_bytes += total_bytes;
++      if (active)
++              found->active_total_bytes += total_bytes;
+       found->disk_total += total_bytes * factor;
+       found->bytes_used += bytes_used;
+       found->disk_used += bytes_used * factor;
+@@ -369,6 +371,22 @@ static u64 calc_available_free_space(struct btrfs_fs_info *fs_info,
+       return avail;
+ }
+ 
++static inline u64 writable_total_bytes(struct btrfs_fs_info *fs_info,
++                                     struct btrfs_space_info *space_info)
++{
++      /*
++       * On regular filesystem, all total_bytes are always writable. On zoned
++       * filesystem, there may be a limitation imposed by max_active_zones.
++       * For metadata allocation, we cannot finish an existing active block
++       * group to avoid a deadlock. Thus, we need to consider only the active
++       * groups to be writable for metadata space.
++       */
++      if (!btrfs_is_zoned(fs_info) || (space_info->flags & BTRFS_BLOCK_GROUP_DATA))
++              return space_info->total_bytes;
++
++      return space_info->active_total_bytes;
++}
++
+ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+                        struct btrfs_space_info *space_info, u64 bytes,
+                        enum btrfs_reserve_flush_enum flush)
+@@ -386,7 +404,7 @@ int btrfs_can_overcommit(struct btrfs_fs_info *fs_info,
+       else
+               avail = calc_available_free_space(fs_info, space_info, flush);
+ 
+-      if (used + bytes < space_info->total_bytes + avail)
++      if (used + bytes < writable_total_bytes(fs_info, space_info) + avail)
+               return 1;
+       return 0;
+ }
+@@ -422,7 +440,7 @@ void btrfs_try_granting_tickets(struct btrfs_fs_info *fs_info,
+               ticket = list_first_entry(head, struct reserve_ticket, list);
+ 
+               /* Check and see if our ticket can be satisfied now. */
+-              if ((used + ticket->bytes <= space_info->total_bytes) ||
++              if ((used + ticket->bytes <= writable_total_bytes(fs_info, space_info)) ||
+                   btrfs_can_overcommit(fs_info, space_info, ticket->bytes,
+                                        flush)) {
+                       btrfs_space_info_update_bytes_may_use(fs_info,
+@@ -753,6 +771,7 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+ {
+       u64 used;
+       u64 avail;
++      u64 total;
+       u64 to_reclaim = space_info->reclaim_size;
+ 
+       lockdep_assert_held(&space_info->lock);
+@@ -767,8 +786,9 @@ btrfs_calc_reclaim_metadata_size(struct btrfs_fs_info *fs_info,
+        * space.  If that's the case add in our overage so we make sure to put
+        * appropriate pressure on the flushing state machine.
+        */
+-      if (space_info->total_bytes + avail < used)
+-              to_reclaim += used - (space_info->total_bytes + avail);
++      total = writable_total_bytes(fs_info, space_info);
++      if (total + avail < used)
++              to_reclaim += used - (total + avail);
+ 
+       return to_reclaim;
+ }
+@@ -778,9 +798,12 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+ {
+       u64 global_rsv_size = fs_info->global_block_rsv.reserved;
+       u64 ordered, delalloc;
+-      u64 thresh = div_factor_fine(space_info->total_bytes, 90);
++      u64 total = writable_total_bytes(fs_info, space_info);
++      u64 thresh;
+       u64 used;
+ 
++      thresh = div_factor_fine(total, 90);
++
+       lockdep_assert_held(&space_info->lock);
+ 
+       /* If we're just plain full then async reclaim just slows us down. */
+@@ -842,8 +865,8 @@ static bool need_preemptive_reclaim(struct btrfs_fs_info *fs_info,
+                                          BTRFS_RESERVE_FLUSH_ALL);
+       used = space_info->bytes_used + space_info->bytes_reserved +
+              space_info->bytes_readonly + global_rsv_size;
+-      if (used < space_info->total_bytes)
+-              thresh += space_info->total_bytes - used;
++      if (used < total)
++              thresh += total - used;
+       thresh >>= space_info->clamp;
+ 
+       used = space_info->bytes_pinned;
+@@ -1560,7 +1583,7 @@ static int __reserve_bytes(struct btrfs_fs_info *fs_info,
+        * can_overcommit() to ensure we can overcommit to continue.
+        */
+       if (!pending_tickets &&
+-          ((used + orig_bytes <= space_info->total_bytes) ||
++          ((used + orig_bytes <= writable_total_bytes(fs_info, space_info)) ||
+            btrfs_can_overcommit(fs_info, space_info, orig_bytes, flush))) {
+               btrfs_space_info_update_bytes_may_use(fs_info, space_info,
+                                                     orig_bytes);
+diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
+index e7de24a529cf..12fd6147f92d 100644
+--- a/fs/btrfs/space-info.h
++++ b/fs/btrfs/space-info.h
+@@ -19,6 +19,8 @@ struct btrfs_space_info {
+       u64 bytes_may_use;      /* number of bytes that may be used for
+                                  delalloc/allocations */
+       u64 bytes_readonly;     /* total bytes that are read only */
++      /* Total bytes in the space, but only accounts active block groups. */
++      u64 active_total_bytes;
+       u64 bytes_zone_unusable;        /* total bytes that are unusable until
+                                          resetting the device zone */
+ 
+@@ -124,7 +126,7 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
+ void btrfs_update_space_info(struct btrfs_fs_info *info, u64 flags,
+                            u64 total_bytes, u64 bytes_used,
+                            u64 bytes_readonly, u64 bytes_zone_unusable,
+-                           struct btrfs_space_info **space_info);
++                           bool active, struct btrfs_space_info **space_info);
+ void btrfs_update_space_info_chunk_size(struct btrfs_space_info *space_info,
+                                       u64 chunk_size);
+ struct btrfs_space_info *btrfs_find_space_info(struct btrfs_fs_info *info,
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index eb9eb9e72187..16ed426a58c9 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -1849,6 +1849,7 @@ struct btrfs_device *btrfs_zoned_get_device(struct btrfs_fs_info *fs_info,
+ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ {
+       struct btrfs_fs_info *fs_info = block_group->fs_info;
++      struct btrfs_space_info *space_info = block_group->space_info;
+       struct map_lookup *map;
+       struct btrfs_device *device;
+       u64 physical;
+@@ -1860,6 +1861,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ 
+       map = block_group->physical_map;
+ 
++      spin_lock(&space_info->lock);
+       spin_lock(&block_group->lock);
+       if (block_group->zone_is_active) {
+               ret = true;
+@@ -1888,7 +1890,10 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ 
+       /* Successfully activated all the zones */
+       block_group->zone_is_active = 1;
++      space_info->active_total_bytes += block_group->length;
+       spin_unlock(&block_group->lock);
++      btrfs_try_granting_tickets(fs_info, space_info);
++      spin_unlock(&space_info->lock);
+ 
+       /* For the active block group list */
+       btrfs_get_block_group(block_group);
+@@ -1901,6 +1906,7 @@ bool btrfs_zone_activate(struct btrfs_block_group *block_group)
+ 
+ out_unlock:
+       spin_unlock(&block_group->lock);
++      spin_unlock(&space_info->lock);
+       return ret;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-revive-max_zone_append_bytes.patch b/queue-5.19/btrfs-zoned-revive-max_zone_append_bytes.patch

new file mode 100644 (file)

index 0000000..5e2f2ad
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-revive-max_zone_append_bytes.patch
@@ -0,0 +1,108 @@
+From cfa300405e0e086b659834007837684fb50f9d09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:39 +0900
+Subject: btrfs: zoned: revive max_zone_append_bytes
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit c2ae7b772ef4e86c5ddf3fd47bf59045ae96a414 ]
+
+This patch is basically a revert of commit 5a80d1c6a270 ("btrfs: zoned:
+remove max_zone_append_size logic"), but without unnecessary ASSERT and
+check. The max_zone_append_size will be used as a hint to estimate the
+number of extents to cover delalloc/writeback region in the later commits.
+
+The size of a ZONE APPEND bio is also limited by queue_max_segments(), so
+this commit considers it to calculate max_zone_append_size. Technically, a
+bio can be larger than queue_max_segments() * PAGE_SIZE if the pages are
+contiguous. But, it is safe to consider "queue_max_segments() * PAGE_SIZE"
+as an upper limit of an extent size to calculate the number of extents
+needed to write data.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h |  2 ++
+ fs/btrfs/zoned.c | 17 +++++++++++++++++
+ fs/btrfs/zoned.h |  1 +
+ 3 files changed, 20 insertions(+)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 9c21e214d29e..7abfbfd7c94c 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -1047,6 +1047,8 @@ struct btrfs_fs_info {
+        */
+       u64 zone_size;
+ 
++      /* Max size to emit ZONE_APPEND write command */
++      u64 max_zone_append_size;
+       struct mutex zoned_meta_io_lock;
+       spinlock_t treelog_bg_lock;
+       u64 treelog_bg;
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index d99026df6f67..52607569cf49 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -415,6 +415,16 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
+       nr_sectors = bdev_nr_sectors(bdev);
+       zone_info->zone_size_shift = ilog2(zone_info->zone_size);
+       zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
++      /*
++       * We limit max_zone_append_size also by max_segments *
++       * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
++       * since btrfs adds the pages one by one to a bio, and btrfs cannot
++       * increase the metadata reservation even if it increases the number of
++       * extents, it is safe to stick with the limit.
++       */
++      zone_info->max_zone_append_size =
++              min_t(u64, (u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
++                    (u64)bdev_max_segments(bdev) << PAGE_SHIFT);
+       if (!IS_ALIGNED(nr_sectors, zone_sectors))
+               zone_info->nr_zones++;
+ 
+@@ -640,6 +650,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       u64 zoned_devices = 0;
+       u64 nr_devices = 0;
+       u64 zone_size = 0;
++      u64 max_zone_append_size = 0;
+       const bool incompat_zoned = btrfs_fs_incompat(fs_info, ZONED);
+       int ret = 0;
+ 
+@@ -674,6 +685,11 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+                               ret = -EINVAL;
+                               goto out;
+                       }
++                      if (!max_zone_append_size ||
++                          (zone_info->max_zone_append_size &&
++                           zone_info->max_zone_append_size < max_zone_append_size))
++                              max_zone_append_size =
++                                      zone_info->max_zone_append_size;
+               }
+               nr_devices++;
+       }
+@@ -723,6 +739,7 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
+       }
+ 
+       fs_info->zone_size = zone_size;
++      fs_info->max_zone_append_size = max_zone_append_size;
+       fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
+ 
+       /*
+diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
+index 6b2eec99162b..9caeab07fd38 100644
+--- a/fs/btrfs/zoned.h
++++ b/fs/btrfs/zoned.h
+@@ -19,6 +19,7 @@ struct btrfs_zoned_device_info {
+        */
+       u64 zone_size;
+       u8  zone_size_shift;
++      u64 max_zone_append_size;
+       u32 nr_zones;
+       unsigned int max_active_zones;
+       atomic_t active_zones_left;
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch b/queue-5.19/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch

new file mode 100644 (file)

index 0000000..7212357
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch
@@ -0,0 +1,114 @@
+From 4da18530914a295587102fff8279cbdd7768236f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:50 +0900
+Subject: btrfs: zoned: wait until zone is finished when allocation didn't
+ progress
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 2ce543f478433a0eec0f72090d7e814f1d53d456 ]
+
+When the allocated position doesn't progress, we cannot submit IOs to
+finish a block group, but there should be ongoing IOs that will finish a
+block group. So, in that case, we wait for a zone to be finished and retry
+the allocation after that.
+
+Introduce a new flag BTRFS_FS_NEED_ZONE_FINISH for fs_info->flags to
+indicate we need a zone finish to have proceeded. The flag is set when the
+allocator detected it cannot activate a new block group. And, it is cleared
+once a zone is finished.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/ctree.h   | 5 +++++
+ fs/btrfs/disk-io.c | 1 +
+ fs/btrfs/inode.c   | 9 +++++++--
+ fs/btrfs/zoned.c   | 6 ++++++
+ 4 files changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index d306db5dbdc2..3a51d0c13a95 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -627,6 +627,9 @@ enum {
+       /* Indicate we have half completed snapshot deletions pending. */
+       BTRFS_FS_UNFINISHED_DROPS,
+ 
++      /* Indicate we have to finish a zone to do next allocation. */
++      BTRFS_FS_NEED_ZONE_FINISH,
++
+ #if BITS_PER_LONG == 32
+       /* Indicate if we have error/warn message printed on 32bit systems */
+       BTRFS_FS_32BIT_ERROR,
+@@ -1063,6 +1066,8 @@ struct btrfs_fs_info {
+ 
+       spinlock_t zone_active_bgs_lock;
+       struct list_head zone_active_bgs;
++      /* Waiters when BTRFS_FS_NEED_ZONE_FINISH is set */
++      wait_queue_head_t zone_finish_wait;
+ 
+ #ifdef CONFIG_BTRFS_FS_REF_VERIFY
+       spinlock_t ref_verify_lock;
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 804dcc69787d..bc3030661583 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -3255,6 +3255,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
+       init_waitqueue_head(&fs_info->transaction_blocked_wait);
+       init_waitqueue_head(&fs_info->async_submit_wait);
+       init_waitqueue_head(&fs_info->delayed_iputs_wait);
++      init_waitqueue_head(&fs_info->zone_finish_wait);
+ 
+       /* Usable values until the real ones are cached from the superblock */
+       fs_info->nodesize = 4096;
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 4f5249f5cb34..61496ecb1e20 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -1642,8 +1642,13 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+               if (ret == 0)
+                       done_offset = end;
+ 
+-              if (done_offset == start)
+-                      return -ENOSPC;
++              if (done_offset == start) {
++                      struct btrfs_fs_info *info = inode->root->fs_info;
++
++                      wait_var_event(&info->zone_finish_wait,
++                                     !test_bit(BTRFS_FS_NEED_ZONE_FINISH, &info->flags));
++                      continue;
++              }
+ 
+               if (!locked_page_done) {
+                       __set_page_dirty_nobuffers(locked_page);
+diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
+index 4df5b36dc574..31cb11daa8e8 100644
+--- a/fs/btrfs/zoned.c
++++ b/fs/btrfs/zoned.c
+@@ -2007,6 +2007,9 @@ static int do_zone_finish(struct btrfs_block_group *block_group, bool fully_writ
+       /* For active_bg_list */
+       btrfs_put_block_group(block_group);
+ 
++      clear_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
++      wake_up_all(&fs_info->zone_finish_wait);
++
+       return 0;
+ }
+ 
+@@ -2043,6 +2046,9 @@ bool btrfs_can_activate_zone(struct btrfs_fs_devices *fs_devices, u64 flags)
+       }
+       mutex_unlock(&fs_info->chunk_mutex);
+ 
++      if (!ret)
++              set_bit(BTRFS_FS_NEED_ZONE_FINISH, &fs_info->flags);
++
+       return ret;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/btrfs-zoned-write-out-partially-allocated-region.patch b/queue-5.19/btrfs-zoned-write-out-partially-allocated-region.patch

new file mode 100644 (file)

index 0000000..4226b20
--- /dev/null
+++ b/queue-5.19/btrfs-zoned-write-out-partially-allocated-region.patch
@@ -0,0 +1,186 @@
+From 16c9042fc56b17224776ce43f2c452d1d82f8ef1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 9 Jul 2022 08:18:49 +0900
+Subject: btrfs: zoned: write out partially allocated region
+
+From: Naohiro Aota <naohiro.aota@wdc.com>
+
+[ Upstream commit 898793d992c23dac6126a6a94ad893eae1a2c9df ]
+
+cow_file_range() works in an all-or-nothing way: if it fails to allocate an
+extent for a part of the given region, it gives up all the region including
+the successfully allocated parts. On cow_file_range(), run_delalloc_zoned()
+writes data for the region only when it successfully allocate all the
+region.
+
+This all-or-nothing allocation and write-out are problematic when available
+space in all the block groups are get tight with the active zone
+restriction. btrfs_reserve_extent() try hard to utilize the left space in
+the active block groups and gives up finally and fails with
+-ENOSPC. However, if we send IOs for the successfully allocated region, we
+can finish a zone and can continue on the rest of the allocation on a newly
+allocated block group.
+
+This patch implements the partial write-out for run_delalloc_zoned(). With
+this patch applied, cow_file_range() returns -EAGAIN to tell the caller to
+do something to progress the further allocation, and tells the successfully
+allocated region with done_offset. Furthermore, the zoned extent allocator
+returns -EAGAIN to tell cow_file_range() going back to the caller side.
+
+Actually, we still need to wait for an IO to complete to continue the
+allocation. The next patch implements that part.
+
+CC: stable@vger.kernel.org # 5.16+
+Fixes: afba2bc036b0 ("btrfs: zoned: implement active zone tracking")
+Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent-tree.c | 10 +++++++
+ fs/btrfs/inode.c       | 63 ++++++++++++++++++++++++++++++++----------
+ 2 files changed, 59 insertions(+), 14 deletions(-)
+
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index ad45083c6461..f2c79838ebe5 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -4012,6 +4012,16 @@ static int can_allocate_chunk_zoned(struct btrfs_fs_info *fs_info,
+       if (ffe_ctl->max_extent_size >= ffe_ctl->min_alloc_size)
+               return -ENOSPC;
+ 
++      /*
++       * Even min_alloc_size is not left in any block groups. Since we cannot
++       * activate a new block group, allocating it may not help. Let's tell a
++       * caller to try again and hope it progress something by writing some
++       * parts of the region. That is only possible for data block groups,
++       * where a part of the region can be written.
++       */
++      if (ffe_ctl->flags & BTRFS_BLOCK_GROUP_DATA)
++              return -EAGAIN;
++
+       /*
+        * We cannot activate a new block group and no enough space left in any
+        * block groups. So, allocating a new block group may not help. But,
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index 30e454197fb9..4f5249f5cb34 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -118,7 +118,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+-                                 unsigned long *nr_written, int unlock);
++                                 unsigned long *nr_written, int unlock,
++                                 u64 *done_offset);
+ static struct extent_map *create_io_em(struct btrfs_inode *inode, u64 start,
+                                      u64 len, u64 orig_start, u64 block_start,
+                                      u64 block_len, u64 orig_block_len,
+@@ -920,7 +921,7 @@ static int submit_uncompressed_range(struct btrfs_inode *inode,
+        * can directly submit them without interruption.
+        */
+       ret = cow_file_range(inode, locked_page, start, end, &page_started,
+-                           &nr_written, 0);
++                           &nr_written, 0, NULL);
+       /* Inline extent inserted, page gets unlocked and everything is done */
+       if (page_started) {
+               ret = 0;
+@@ -1169,7 +1170,8 @@ static u64 get_extent_allocation_hint(struct btrfs_inode *inode, u64 start,
+ static noinline int cow_file_range(struct btrfs_inode *inode,
+                                  struct page *locked_page,
+                                  u64 start, u64 end, int *page_started,
+-                                 unsigned long *nr_written, int unlock)
++                                 unsigned long *nr_written, int unlock,
++                                 u64 *done_offset)
+ {
+       struct btrfs_root *root = inode->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
+@@ -1362,6 +1364,21 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
+       btrfs_dec_block_group_reservations(fs_info, ins.objectid);
+       btrfs_free_reserved_extent(fs_info, ins.objectid, ins.offset, 1);
+ out_unlock:
++      /*
++       * If done_offset is non-NULL and ret == -EAGAIN, we expect the
++       * caller to write out the successfully allocated region and retry.
++       */
++      if (done_offset && ret == -EAGAIN) {
++              if (orig_start < start)
++                      *done_offset = start - 1;
++              else
++                      *done_offset = start;
++              return ret;
++      } else if (ret == -EAGAIN) {
++              /* Convert to -ENOSPC since the caller cannot retry. */
++              ret = -ENOSPC;
++      }
++
+       /*
+        * Now, we have three regions to clean up:
+        *
+@@ -1607,19 +1624,37 @@ static noinline int run_delalloc_zoned(struct btrfs_inode *inode,
+                                      u64 end, int *page_started,
+                                      unsigned long *nr_written)
+ {
++      u64 done_offset = end;
+       int ret;
++      bool locked_page_done = false;
+ 
+-      ret = cow_file_range(inode, locked_page, start, end, page_started,
+-                           nr_written, 0);
+-      if (ret)
+-              return ret;
++      while (start <= end) {
++              ret = cow_file_range(inode, locked_page, start, end, page_started,
++                                   nr_written, 0, &done_offset);
++              if (ret && ret != -EAGAIN)
++                      return ret;
+ 
+-      if (*page_started)
+-              return 0;
++              if (*page_started) {
++                      ASSERT(ret == 0);
++                      return 0;
++              }
++
++              if (ret == 0)
++                      done_offset = end;
++
++              if (done_offset == start)
++                      return -ENOSPC;
++
++              if (!locked_page_done) {
++                      __set_page_dirty_nobuffers(locked_page);
++                      account_page_redirty(locked_page);
++              }
++              locked_page_done = true;
++              extent_write_locked_range(&inode->vfs_inode, start, done_offset);
++
++              start = done_offset + 1;
++      }
+ 
+-      __set_page_dirty_nobuffers(locked_page);
+-      account_page_redirty(locked_page);
+-      extent_write_locked_range(&inode->vfs_inode, start, end);
+       *page_started = 1;
+ 
+       return 0;
+@@ -1711,7 +1746,7 @@ static int fallback_to_cow(struct btrfs_inode *inode, struct page *locked_page,
+       }
+ 
+       return cow_file_range(inode, locked_page, start, end, page_started,
+-                            nr_written, 1);
++                            nr_written, 1, NULL);
+ }
+ 
+ struct can_nocow_file_extent_args {
+@@ -2184,7 +2219,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct page *locked_page
+                                                page_started, nr_written);
+               else
+                       ret = cow_file_range(inode, locked_page, start, end,
+-                                           page_started, nr_written, 1);
++                                           page_started, nr_written, 1, NULL);
+       } else {
+               set_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, &inode->runtime_flags);
+               ret = cow_file_range_async(inode, wbc, locked_page, start, end,
+-- 
+2.35.1
+
diff --git a/queue-5.19/coresight-clear-the-connection-field-properly.patch-22500 b/queue-5.19/coresight-clear-the-connection-field-properly.patch-22500

new file mode 100644 (file)

index 0000000..4a2039b
--- /dev/null
+++ b/queue-5.19/coresight-clear-the-connection-field-properly.patch-22500
@@ -0,0 +1,123 @@
+From 8eeb51ad2a4674628201ee893e70546808e56fb4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 22:40:24 +0100
+Subject: coresight: Clear the connection field properly
+
+From: Suzuki K Poulose <suzuki.poulose@arm.com>
+
+[ Upstream commit 2af89ebacf299b7fba5f3087d35e8a286ec33706 ]
+
+coresight devices track their connections (output connections) and
+hold a reference to the fwnode. When a device goes away, we walk through
+the devices on the coresight bus and make sure that the references
+are dropped. This happens both ways:
+ a) For all output connections from the device, drop the reference to
+    the target device via coresight_release_platform_data()
+
+b) Iterate over all the devices on the coresight bus and drop the
+   reference to fwnode if *this* device is the target of the output
+   connection, via coresight_remove_conns()->coresight_remove_match().
+
+However, the coresight_remove_match() doesn't clear the fwnode field,
+after dropping the reference, this causes use-after-free and
+additional refcount drops on the fwnode.
+
+e.g., if we have two devices, A and B, with a connection, A -> B.
+If we remove B first, B would clear the reference on B, from A
+via coresight_remove_match(). But when A is removed, it still has
+a connection with fwnode still pointing to B. Thus it tries to  drops
+the reference in coresight_release_platform_data(), raising the bells
+like :
+
+[   91.990153] ------------[ cut here ]------------
+[   91.990163] refcount_t: addition on 0; use-after-free.
+[   91.990212] WARNING: CPU: 0 PID: 461 at lib/refcount.c:25 refcount_warn_saturate+0xa0/0x144
+[   91.990260] Modules linked in: coresight_funnel coresight_replicator coresight_etm4x(-)
+ crct10dif_ce coresight ip_tables x_tables ipv6 [last unloaded: coresight_cpu_debug]
+[   91.990398] CPU: 0 PID: 461 Comm: rmmod Tainted: G        W       T 5.19.0-rc2+ #53
+[   91.990418] Hardware name: ARM LTD ARM Juno Development Platform/ARM Juno Development Platform, BIOS EDK II Feb  1 2019
+[   91.990434] pstate: 600000c5 (nZCv daIF -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+[   91.990454] pc : refcount_warn_saturate+0xa0/0x144
+[   91.990476] lr : refcount_warn_saturate+0xa0/0x144
+[   91.990496] sp : ffff80000c843640
+[   91.990509] x29: ffff80000c843640 x28: ffff800009957c28 x27: ffff80000c8439a8
+[   91.990560] x26: ffff00097eff1990 x25: ffff8000092b6ad8 x24: ffff00097eff19a8
+[   91.990610] x23: ffff80000c8439a8 x22: 0000000000000000 x21: ffff80000c8439c2
+[   91.990659] x20: 0000000000000000 x19: ffff00097eff1a10 x18: ffff80000ab99c40
+[   91.990708] x17: 0000000000000000 x16: 0000000000000000 x15: ffff80000abf6fa0
+[   91.990756] x14: 000000000000001d x13: 0a2e656572662d72 x12: 657466612d657375
+[   91.990805] x11: 203b30206e6f206e x10: 6f69746964646120 x9 : ffff8000081aba28
+[   91.990854] x8 : 206e6f206e6f6974 x7 : 69646461203a745f x6 : 746e756f63666572
+[   91.990903] x5 : ffff00097648ec58 x4 : 0000000000000000 x3 : 0000000000000027
+[   91.990952] x2 : 0000000000000000 x1 : 0000000000000000 x0 : ffff00080260ba00
+[   91.991000] Call trace:
+[   91.991012]  refcount_warn_saturate+0xa0/0x144
+[   91.991034]  kobject_get+0xac/0xb0
+[   91.991055]  of_node_get+0x2c/0x40
+[   91.991076]  of_fwnode_get+0x40/0x60
+[   91.991094]  fwnode_handle_get+0x3c/0x60
+[   91.991116]  fwnode_get_nth_parent+0xf4/0x110
+[   91.991137]  fwnode_full_name_string+0x48/0xc0
+[   91.991158]  device_node_string+0x41c/0x530
+[   91.991178]  pointer+0x320/0x3ec
+[   91.991198]  vsnprintf+0x23c/0x750
+[   91.991217]  vprintk_store+0x104/0x4b0
+[   91.991238]  vprintk_emit+0x8c/0x360
+[   91.991257]  vprintk_default+0x44/0x50
+[   91.991276]  vprintk+0xcc/0xf0
+[   91.991295]  _printk+0x68/0x90
+[   91.991315]  of_node_release+0x13c/0x14c
+[   91.991334]  kobject_put+0x98/0x114
+[   91.991354]  of_node_put+0x24/0x34
+[   91.991372]  of_fwnode_put+0x40/0x5c
+[   91.991390]  fwnode_handle_put+0x38/0x50
+[   91.991411]  coresight_release_platform_data+0x74/0xb0 [coresight]
+[   91.991472]  coresight_unregister+0x64/0xcc [coresight]
+[   91.991525]  etm4_remove_dev+0x64/0x78 [coresight_etm4x]
+[   91.991563]  etm4_remove_amba+0x1c/0x2c [coresight_etm4x]
+[   91.991598]  amba_remove+0x3c/0x19c
+
+Reproducible by: (Build all coresight components as modules):
+
+  #!/bin/sh
+  while true
+  do
+     for m in tmc stm cpu_debug etm4x replicator funnel
+     do
+       modprobe coresight_${m}
+     done
+
+     for m in tmc stm cpu_debug etm4x replicator funnel
+     do
+       rmmode coresight_${m}
+     done
+  done
+
+Cc: stable@vger.kernel.org
+Cc: Mathieu Poirier <mathieu.poirier@linaro.org>
+Cc: Mike Leach <mike.leach@linaro.org>
+Cc: Leo Yan <leo.yan@linaro.org>
+Signed-off-by: Suzuki K Poulose <suzuki.poulose@arm.com>
+Fixes: 37ea1ffddffa ("coresight: Use fwnode handle instead of device names")
+Link: https://lore.kernel.org/r/20220614214024.3005275-1-suzuki.poulose@arm.com
+Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/coresight/coresight-core.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
+index ee6ce92ab4c3..1edfec1e9d18 100644
+--- a/drivers/hwtracing/coresight/coresight-core.c
++++ b/drivers/hwtracing/coresight/coresight-core.c
+@@ -1424,6 +1424,7 @@ static int coresight_remove_match(struct device *dev, void *data)
+                        * platform data.
+                        */
+                       fwnode_handle_put(conn->child_fwnode);
++                      conn->child_fwnode = NULL;
+                       /* No need to continue */
+                       break;
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.19/crypto-blake2s-remove-shash-module.patch b/queue-5.19/crypto-blake2s-remove-shash-module.patch

new file mode 100644 (file)

index 0000000..342cb79
--- /dev/null
+++ b/queue-5.19/crypto-blake2s-remove-shash-module.patch
@@ -0,0 +1,957 @@
+From 7c013d575dcc55edc21d3e18296a351e137ed079 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 28 May 2022 21:44:07 +0200
+Subject: crypto: blake2s - remove shash module
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit 2d16803c562ecc644803d42ba98a8e0aef9c014e ]
+
+BLAKE2s has no currently known use as an shash. Just remove all of this
+unnecessary plumbing. Removing this shash was something we talked about
+back when we were making BLAKE2s a built-in, but I simply never got
+around to doing it. So this completes that project.
+
+Importantly, this fixs a bug in which the lib code depends on
+crypto_simd_disabled_for_test, causing linker errors.
+
+Also add more alignment tests to the selftests and compare SIMD and
+non-SIMD compression functions, to make up for what we lose from
+testmgr.c.
+
+Reported-by: gaochao <gaochao49@huawei.com>
+Cc: Eric Biggers <ebiggers@kernel.org>
+Cc: Ard Biesheuvel <ardb@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: 6048fdcc5f26 ("lib/crypto: blake2s: include as built-in")
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm/crypto/Kconfig           |   2 +-
+ arch/arm/crypto/Makefile          |   4 +-
+ arch/arm/crypto/blake2s-shash.c   |  75 -----------
+ arch/x86/crypto/Makefile          |   4 +-
+ arch/x86/crypto/blake2s-glue.c    |   3 +-
+ arch/x86/crypto/blake2s-shash.c   |  77 -----------
+ crypto/Kconfig                    |  20 +--
+ crypto/Makefile                   |   1 -
+ crypto/blake2s_generic.c          |  75 -----------
+ crypto/tcrypt.c                   |  12 --
+ crypto/testmgr.c                  |  24 ----
+ crypto/testmgr.h                  | 217 ------------------------------
+ include/crypto/internal/blake2s.h | 108 ---------------
+ lib/crypto/blake2s-selftest.c     |  41 ++++++
+ lib/crypto/blake2s.c              |  37 ++++-
+ 15 files changed, 76 insertions(+), 624 deletions(-)
+ delete mode 100644 arch/arm/crypto/blake2s-shash.c
+ delete mode 100644 arch/x86/crypto/blake2s-shash.c
+ delete mode 100644 crypto/blake2s_generic.c
+
+diff --git a/arch/arm/crypto/Kconfig b/arch/arm/crypto/Kconfig
+index e4dba5461cb3..149a5bd6b88c 100644
+--- a/arch/arm/crypto/Kconfig
++++ b/arch/arm/crypto/Kconfig
+@@ -63,7 +63,7 @@ config CRYPTO_SHA512_ARM
+         using optimized ARM assembler and NEON, when available.
+ 
+ config CRYPTO_BLAKE2S_ARM
+-      tristate "BLAKE2s digest algorithm (ARM)"
++      bool "BLAKE2s digest algorithm (ARM)"
+       select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+       help
+         BLAKE2s digest algorithm optimized with ARM scalar instructions.  This
+diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
+index 0274f81cc8ea..971e74546fb1 100644
+--- a/arch/arm/crypto/Makefile
++++ b/arch/arm/crypto/Makefile
+@@ -9,8 +9,7 @@ obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
+ obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
+ obj-$(CONFIG_CRYPTO_SHA256_ARM) += sha256-arm.o
+ obj-$(CONFIG_CRYPTO_SHA512_ARM) += sha512-arm.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += blake2s-arm.o
+-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_ARM),y) += libblake2s-arm.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_ARM) += libblake2s-arm.o
+ obj-$(CONFIG_CRYPTO_BLAKE2B_NEON) += blake2b-neon.o
+ obj-$(CONFIG_CRYPTO_CHACHA20_NEON) += chacha-neon.o
+ obj-$(CONFIG_CRYPTO_POLY1305_ARM) += poly1305-arm.o
+@@ -32,7 +31,6 @@ sha256-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha256_neon_glue.o
+ sha256-arm-y  := sha256-core.o sha256_glue.o $(sha256-arm-neon-y)
+ sha512-arm-neon-$(CONFIG_KERNEL_MODE_NEON) := sha512-neon-glue.o
+ sha512-arm-y  := sha512-core.o sha512-glue.o $(sha512-arm-neon-y)
+-blake2s-arm-y   := blake2s-shash.o
+ libblake2s-arm-y:= blake2s-core.o blake2s-glue.o
+ blake2b-neon-y  := blake2b-neon-core.o blake2b-neon-glue.o
+ sha1-arm-ce-y := sha1-ce-core.o sha1-ce-glue.o
+diff --git a/arch/arm/crypto/blake2s-shash.c b/arch/arm/crypto/blake2s-shash.c
+deleted file mode 100644
+index 763c73beea2d..000000000000
+--- a/arch/arm/crypto/blake2s-shash.c
++++ /dev/null
+@@ -1,75 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0-or-later
+-/*
+- * BLAKE2s digest algorithm, ARM scalar implementation
+- *
+- * Copyright 2020 Google LLC
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/module.h>
+-
+-static int crypto_blake2s_update_arm(struct shash_desc *desc,
+-                                   const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, false);
+-}
+-
+-static int crypto_blake2s_final_arm(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, false);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 200,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_arm,    \
+-              .final                  = crypto_blake2s_final_arm,     \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_arm_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-arm", BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-arm", BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-arm", BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-arm", BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_arm_mod_init(void)
+-{
+-      return IS_REACHABLE(CONFIG_CRYPTO_HASH) ?
+-              crypto_register_shashes(blake2s_arm_algs,
+-                                      ARRAY_SIZE(blake2s_arm_algs)) : 0;
+-}
+-
+-static void __exit blake2s_arm_mod_exit(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH))
+-              crypto_unregister_shashes(blake2s_arm_algs,
+-                                        ARRAY_SIZE(blake2s_arm_algs));
+-}
+-
+-module_init(blake2s_arm_mod_init);
+-module_exit(blake2s_arm_mod_exit);
+-
+-MODULE_DESCRIPTION("BLAKE2s digest algorithm, ARM scalar implementation");
+-MODULE_LICENSE("GPL");
+-MODULE_AUTHOR("Eric Biggers <ebiggers@google.com>");
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-arm");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-arm");
+diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile
+index 2831685adf6f..8ed4597fdf6a 100644
+--- a/arch/x86/crypto/Makefile
++++ b/arch/x86/crypto/Makefile
+@@ -61,9 +61,7 @@ sha256-ssse3-$(CONFIG_AS_SHA256_NI) += sha256_ni_asm.o
+ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
+ sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
+ 
+-obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += blake2s-x86_64.o
+-blake2s-x86_64-y := blake2s-shash.o
+-obj-$(if $(CONFIG_CRYPTO_BLAKE2S_X86),y) += libblake2s-x86_64.o
++obj-$(CONFIG_CRYPTO_BLAKE2S_X86) += libblake2s-x86_64.o
+ libblake2s-x86_64-y := blake2s-core.o blake2s-glue.o
+ 
+ obj-$(CONFIG_CRYPTO_GHASH_CLMUL_NI_INTEL) += ghash-clmulni-intel.o
+diff --git a/arch/x86/crypto/blake2s-glue.c b/arch/x86/crypto/blake2s-glue.c
+index 69853c13e8fb..aaba21230528 100644
+--- a/arch/x86/crypto/blake2s-glue.c
++++ b/arch/x86/crypto/blake2s-glue.c
+@@ -4,7 +4,6 @@
+  */
+ 
+ #include <crypto/internal/blake2s.h>
+-#include <crypto/internal/simd.h>
+ 
+ #include <linux/types.h>
+ #include <linux/jump_label.h>
+@@ -33,7 +32,7 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
+       /* SIMD disables preemption, so relax after processing each page. */
+       BUILD_BUG_ON(SZ_4K / BLAKE2S_BLOCK_SIZE < 8);
+ 
+-      if (!static_branch_likely(&blake2s_use_ssse3) || !crypto_simd_usable()) {
++      if (!static_branch_likely(&blake2s_use_ssse3) || !may_use_simd()) {
+               blake2s_compress_generic(state, block, nblocks, inc);
+               return;
+       }
+diff --git a/arch/x86/crypto/blake2s-shash.c b/arch/x86/crypto/blake2s-shash.c
+deleted file mode 100644
+index 59ae28abe35c..000000000000
+--- a/arch/x86/crypto/blake2s-shash.c
++++ /dev/null
+@@ -1,77 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/simd.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-#include <linux/sizes.h>
+-
+-#include <asm/cpufeature.h>
+-#include <asm/processor.h>
+-
+-static int crypto_blake2s_update_x86(struct shash_desc *desc,
+-                                   const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, false);
+-}
+-
+-static int crypto_blake2s_final_x86(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, false);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 200,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_x86,    \
+-              .final                  = crypto_blake2s_final_x86,     \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-x86", BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-x86", BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-x86", BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-x86", BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_mod_init(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+-              return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-      return 0;
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+-      if (IS_REACHABLE(CONFIG_CRYPTO_HASH) && boot_cpu_has(X86_FEATURE_SSSE3))
+-              crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+-
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-x86");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-x86");
+-MODULE_LICENSE("GPL v2");
+diff --git a/crypto/Kconfig b/crypto/Kconfig
+index 7b81685b5655..c730eca940de 100644
+--- a/crypto/Kconfig
++++ b/crypto/Kconfig
+@@ -704,26 +704,8 @@ config CRYPTO_BLAKE2B
+ 
+         See https://blake2.net for further information.
+ 
+-config CRYPTO_BLAKE2S
+-      tristate "BLAKE2s digest algorithm"
+-      select CRYPTO_LIB_BLAKE2S_GENERIC
+-      select CRYPTO_HASH
+-      help
+-        Implementation of cryptographic hash function BLAKE2s
+-        optimized for 8-32bit platforms and can produce digests of any size
+-        between 1 to 32.  The keyed hash is also implemented.
+-
+-        This module provides the following algorithms:
+-
+-        - blake2s-128
+-        - blake2s-160
+-        - blake2s-224
+-        - blake2s-256
+-
+-        See https://blake2.net for further information.
+-
+ config CRYPTO_BLAKE2S_X86
+-      tristate "BLAKE2s digest algorithm (x86 accelerated version)"
++      bool "BLAKE2s digest algorithm (x86 accelerated version)"
+       depends on X86 && 64BIT
+       select CRYPTO_LIB_BLAKE2S_GENERIC
+       select CRYPTO_ARCH_HAVE_LIB_BLAKE2S
+diff --git a/crypto/Makefile b/crypto/Makefile
+index ceaaa9f34145..5243f8908e8d 100644
+--- a/crypto/Makefile
++++ b/crypto/Makefile
+@@ -84,7 +84,6 @@ obj-$(CONFIG_CRYPTO_STREEBOG) += streebog_generic.o
+ obj-$(CONFIG_CRYPTO_WP512) += wp512.o
+ CFLAGS_wp512.o := $(call cc-option,-fno-schedule-insns)  # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=79149
+ obj-$(CONFIG_CRYPTO_BLAKE2B) += blake2b_generic.o
+-obj-$(CONFIG_CRYPTO_BLAKE2S) += blake2s_generic.o
+ obj-$(CONFIG_CRYPTO_GF128MUL) += gf128mul.o
+ obj-$(CONFIG_CRYPTO_ECB) += ecb.o
+ obj-$(CONFIG_CRYPTO_CBC) += cbc.o
+diff --git a/crypto/blake2s_generic.c b/crypto/blake2s_generic.c
+deleted file mode 100644
+index 5f96a21f8788..000000000000
+--- a/crypto/blake2s_generic.c
++++ /dev/null
+@@ -1,75 +0,0 @@
+-// SPDX-License-Identifier: GPL-2.0 OR MIT
+-/*
+- * shash interface to the generic implementation of BLAKE2s
+- *
+- * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved.
+- */
+-
+-#include <crypto/internal/blake2s.h>
+-#include <crypto/internal/hash.h>
+-
+-#include <linux/types.h>
+-#include <linux/kernel.h>
+-#include <linux/module.h>
+-
+-static int crypto_blake2s_update_generic(struct shash_desc *desc,
+-                                       const u8 *in, unsigned int inlen)
+-{
+-      return crypto_blake2s_update(desc, in, inlen, true);
+-}
+-
+-static int crypto_blake2s_final_generic(struct shash_desc *desc, u8 *out)
+-{
+-      return crypto_blake2s_final(desc, out, true);
+-}
+-
+-#define BLAKE2S_ALG(name, driver_name, digest_size)                   \
+-      {                                                               \
+-              .base.cra_name          = name,                         \
+-              .base.cra_driver_name   = driver_name,                  \
+-              .base.cra_priority      = 100,                          \
+-              .base.cra_flags         = CRYPTO_ALG_OPTIONAL_KEY,      \
+-              .base.cra_blocksize     = BLAKE2S_BLOCK_SIZE,           \
+-              .base.cra_ctxsize       = sizeof(struct blake2s_tfm_ctx), \
+-              .base.cra_module        = THIS_MODULE,                  \
+-              .digestsize             = digest_size,                  \
+-              .setkey                 = crypto_blake2s_setkey,        \
+-              .init                   = crypto_blake2s_init,          \
+-              .update                 = crypto_blake2s_update_generic, \
+-              .final                  = crypto_blake2s_final_generic, \
+-              .descsize               = sizeof(struct blake2s_state), \
+-      }
+-
+-static struct shash_alg blake2s_algs[] = {
+-      BLAKE2S_ALG("blake2s-128", "blake2s-128-generic",
+-                  BLAKE2S_128_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-160", "blake2s-160-generic",
+-                  BLAKE2S_160_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-224", "blake2s-224-generic",
+-                  BLAKE2S_224_HASH_SIZE),
+-      BLAKE2S_ALG("blake2s-256", "blake2s-256-generic",
+-                  BLAKE2S_256_HASH_SIZE),
+-};
+-
+-static int __init blake2s_mod_init(void)
+-{
+-      return crypto_register_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-static void __exit blake2s_mod_exit(void)
+-{
+-      crypto_unregister_shashes(blake2s_algs, ARRAY_SIZE(blake2s_algs));
+-}
+-
+-subsys_initcall(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+-
+-MODULE_ALIAS_CRYPTO("blake2s-128");
+-MODULE_ALIAS_CRYPTO("blake2s-128-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-160");
+-MODULE_ALIAS_CRYPTO("blake2s-160-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-224");
+-MODULE_ALIAS_CRYPTO("blake2s-224-generic");
+-MODULE_ALIAS_CRYPTO("blake2s-256");
+-MODULE_ALIAS_CRYPTO("blake2s-256-generic");
+-MODULE_LICENSE("GPL v2");
+diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
+index 2bacf8384f59..66b7ca1ccb23 100644
+--- a/crypto/tcrypt.c
++++ b/crypto/tcrypt.c
+@@ -1669,10 +1669,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               ret += tcrypt_test("rmd160");
+               break;
+ 
+-      case 41:
+-              ret += tcrypt_test("blake2s-256");
+-              break;
+-
+       case 42:
+               ret += tcrypt_test("blake2b-512");
+               break;
+@@ -2240,10 +2236,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               test_hash_speed("rmd160", sec, generic_hash_speed_template);
+               if (mode > 300 && mode < 400) break;
+               fallthrough;
+-      case 316:
+-              test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
+-              if (mode > 300 && mode < 400) break;
+-              fallthrough;
+       case 317:
+               test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
+               if (mode > 300 && mode < 400) break;
+@@ -2352,10 +2344,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
+               test_ahash_speed("rmd160", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+               fallthrough;
+-      case 416:
+-              test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
+-              if (mode > 400 && mode < 500) break;
+-              fallthrough;
+       case 417:
+               test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
+               if (mode > 400 && mode < 500) break;
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 5801a8f9f713..38acebbb3ed1 100644
+--- a/crypto/testmgr.c
++++ b/crypto/testmgr.c
+@@ -4375,30 +4375,6 @@ static const struct alg_test_desc alg_test_descs[] = {
+               .suite = {
+                       .hash = __VECS(blake2b_512_tv_template)
+               }
+-      }, {
+-              .alg = "blake2s-128",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_128_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-160",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_160_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-224",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_224_tv_template)
+-              }
+-      }, {
+-              .alg = "blake2s-256",
+-              .test = alg_test_hash,
+-              .suite = {
+-                      .hash = __VECS(blakes2s_256_tv_template)
+-              }
+       }, {
+               .alg = "cbc(aes)",
+               .test = alg_test_skcipher,
+diff --git a/crypto/testmgr.h b/crypto/testmgr.h
+index 4d7449fc6a65..c29658337d96 100644
+--- a/crypto/testmgr.h
++++ b/crypto/testmgr.h
+@@ -34034,221 +34034,4 @@ static const struct hash_testvec blake2b_512_tv_template[] = {{
+                         0xae, 0x15, 0x81, 0x15, 0xd0, 0x88, 0xa0, 0x3c, },
+ }};
+ 
+-static const struct hash_testvec blakes2s_128_tv_template[] = {{
+-      .digest = (u8[]){ 0x64, 0x55, 0x0d, 0x6f, 0xfe, 0x2c, 0x0a, 0x01,
+-                        0xa1, 0x4a, 0xba, 0x1e, 0xad, 0xe0, 0x20, 0x0c, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0xdc, 0x66, 0xca, 0x8f, 0x03, 0x86, 0x58, 0x01,
+-                        0xb0, 0xff, 0xe0, 0x6e, 0xd8, 0xa1, 0xa9, 0x0e, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x88, 0x1e, 0x42, 0xe7, 0xbb, 0x35, 0x80, 0x82,
+-                        0x63, 0x7c, 0x0a, 0x0f, 0xd7, 0xec, 0x6c, 0x2f, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0xcf, 0x9e, 0x07, 0x2a, 0xd5, 0x22, 0xf2, 0xcd,
+-                        0xa2, 0xd8, 0x25, 0x21, 0x80, 0x86, 0x73, 0x1c, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0xf6, 0x33, 0x5a, 0x2c, 0x22, 0xa0, 0x64, 0xb2,
+-                        0xb6, 0x3f, 0xeb, 0xbc, 0xd1, 0xc3, 0xe5, 0xb2, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x72, 0x66, 0x49, 0x60, 0xf9, 0x4a, 0xea, 0xbe,
+-                        0x1f, 0xf4, 0x60, 0xce, 0xb7, 0x81, 0xcb, 0x09, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xd5, 0xa4, 0x0e, 0xc3, 0x16, 0xc7, 0x51, 0xa6,
+-                        0x3c, 0xd0, 0xd9, 0x11, 0x57, 0xfa, 0x1e, 0xbb, },
+-}};
+-
+-static const struct hash_testvec blakes2s_160_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0xb4, 0xf2, 0x03, 0x49, 0x37, 0xed, 0xb1, 0x3e,
+-                        0x5b, 0x2a, 0xca, 0x64, 0x82, 0x74, 0xf6, 0x62,
+-                        0xe3, 0xf2, 0x84, 0xff, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xaa, 0x56, 0x9b, 0xdc, 0x98, 0x17, 0x75, 0xf2,
+-                        0xb3, 0x68, 0x83, 0xb7, 0x9b, 0x8d, 0x48, 0xb1,
+-                        0x9b, 0x2d, 0x35, 0x05, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .digest = (u8[]){ 0x50, 0x16, 0xe7, 0x0c, 0x01, 0xd0, 0xd3, 0xc3,
+-                        0xf4, 0x3e, 0xb1, 0x6e, 0x97, 0xa9, 0x4e, 0xd1,
+-                        0x79, 0x65, 0x32, 0x93, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x1c, 0x2b, 0xcd, 0x9a, 0x68, 0xca, 0x8c, 0x71,
+-                        0x90, 0x29, 0x6c, 0x54, 0xfa, 0x56, 0x4a, 0xef,
+-                        0xa2, 0x3a, 0x56, 0x9c, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0x36, 0xc3, 0x5f, 0x9a, 0xdc, 0x7e, 0xbf, 0x19,
+-                        0x68, 0xaa, 0xca, 0xd8, 0x81, 0xbf, 0x09, 0x34,
+-                        0x83, 0x39, 0x0f, 0x30, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0x86, 0x80, 0x78, 0xa4, 0x14, 0xec, 0x03, 0xe5,
+-                        0xb6, 0x9a, 0x52, 0x0e, 0x42, 0xee, 0x39, 0x9d,
+-                        0xac, 0xa6, 0x81, 0x63, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x2d, 0xd8, 0xd2, 0x53, 0x66, 0xfa, 0xa9, 0x01,
+-                        0x1c, 0x9c, 0xaf, 0xa3, 0xe2, 0x9d, 0x9b, 0x10,
+-                        0x0a, 0xf6, 0x73, 0xe8, },
+-}};
+-
+-static const struct hash_testvec blakes2s_224_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x61, 0xb9, 0x4e, 0xc9, 0x46, 0x22, 0xa3, 0x91,
+-                        0xd2, 0xae, 0x42, 0xe6, 0x45, 0x6c, 0x90, 0x12,
+-                        0xd5, 0x80, 0x07, 0x97, 0xb8, 0x86, 0x5a, 0xfc,
+-                        0x48, 0x21, 0x97, 0xbb, },
+-}, {
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x9e, 0xda, 0xc7, 0x20, 0x2c, 0xd8, 0x48, 0x2e,
+-                        0x31, 0x94, 0xab, 0x46, 0x6d, 0x94, 0xd8, 0xb4,
+-                        0x69, 0xcd, 0xae, 0x19, 0x6d, 0x9e, 0x41, 0xcc,
+-                        0x2b, 0xa4, 0xd5, 0xf6, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .digest = (u8[]){ 0x32, 0xc0, 0xac, 0xf4, 0x3b, 0xd3, 0x07, 0x9f,
+-                        0xbe, 0xfb, 0xfa, 0x4d, 0x6b, 0x4e, 0x56, 0xb3,
+-                        0xaa, 0xd3, 0x27, 0xf6, 0x14, 0xbf, 0xb9, 0x32,
+-                        0xa7, 0x19, 0xfc, 0xb8, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0x73, 0xad, 0x5e, 0x6d, 0xb9, 0x02, 0x8e, 0x76,
+-                        0xf2, 0x66, 0x42, 0x4b, 0x4c, 0xfa, 0x1f, 0xe6,
+-                        0x2e, 0x56, 0x40, 0xe5, 0xa2, 0xb0, 0x3c, 0xe8,
+-                        0x7b, 0x45, 0xfe, 0x05, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0x16, 0x60, 0xfb, 0x92, 0x54, 0xb3, 0x6e, 0x36,
+-                        0x81, 0xf4, 0x16, 0x41, 0xc3, 0x3d, 0xd3, 0x43,
+-                        0x84, 0xed, 0x10, 0x6f, 0x65, 0x80, 0x7a, 0x3e,
+-                        0x25, 0xab, 0xc5, 0x02, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0xca, 0xaa, 0x39, 0x67, 0x9c, 0xf7, 0x6b, 0xc7,
+-                        0xb6, 0x82, 0xca, 0x0e, 0x65, 0x36, 0x5b, 0x7c,
+-                        0x24, 0x00, 0xfa, 0x5f, 0xda, 0x06, 0x91, 0x93,
+-                        0x6a, 0x31, 0x83, 0xb5, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0x90, 0x02, 0x26, 0xb5, 0x06, 0x9c, 0x36, 0x86,
+-                        0x94, 0x91, 0x90, 0x1e, 0x7d, 0x2a, 0x71, 0xb2,
+-                        0x48, 0xb5, 0xe8, 0x16, 0xfd, 0x64, 0x33, 0x45,
+-                        0xb3, 0xd7, 0xec, 0xcc, },
+-}};
+-
+-static const struct hash_testvec blakes2s_256_tv_template[] = {{
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 15,
+-      .digest = (u8[]){ 0xd9, 0x7c, 0x82, 0x8d, 0x81, 0x82, 0xa7, 0x21,
+-                        0x80, 0xa0, 0x6a, 0x78, 0x26, 0x83, 0x30, 0x67,
+-                        0x3f, 0x7c, 0x4e, 0x06, 0x35, 0x94, 0x7c, 0x04,
+-                        0xc0, 0x23, 0x23, 0xfd, 0x45, 0xc0, 0xa5, 0x2d, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .digest = (u8[]){ 0x48, 0xa8, 0x99, 0x7d, 0xa4, 0x07, 0x87, 0x6b,
+-                        0x3d, 0x79, 0xc0, 0xd9, 0x23, 0x25, 0xad, 0x3b,
+-                        0x89, 0xcb, 0xb7, 0x54, 0xd8, 0x6a, 0xb7, 0x1a,
+-                        0xee, 0x04, 0x7a, 0xd3, 0x45, 0xfd, 0x2c, 0x49, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 1,
+-      .digest = (u8[]){ 0x22, 0x27, 0xae, 0xaa, 0x6e, 0x81, 0x56, 0x03,
+-                        0xa7, 0xe3, 0xa1, 0x18, 0xa5, 0x9a, 0x2c, 0x18,
+-                        0xf4, 0x63, 0xbc, 0x16, 0x70, 0xf1, 0xe7, 0x4b,
+-                        0x00, 0x6d, 0x66, 0x16, 0xae, 0x9e, 0x74, 0x4e, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 7,
+-      .digest = (u8[]){ 0x58, 0x5d, 0xa8, 0x60, 0x1c, 0xa4, 0xd8, 0x03,
+-                        0x86, 0x86, 0x84, 0x64, 0xd7, 0xa0, 0x8e, 0x15,
+-                        0x2f, 0x05, 0xa2, 0x1b, 0xbc, 0xef, 0x7a, 0x34,
+-                        0xb3, 0xc5, 0xbc, 0x4b, 0xf0, 0x32, 0xeb, 0x12, },
+-}, {
+-      .ksize = 32,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 64,
+-      .digest = (u8[]){ 0x89, 0x75, 0xb0, 0x57, 0x7f, 0xd3, 0x55, 0x66,
+-                        0xd7, 0x50, 0xb3, 0x62, 0xb0, 0x89, 0x7a, 0x26,
+-                        0xc3, 0x99, 0x13, 0x6d, 0xf0, 0x7b, 0xab, 0xab,
+-                        0xbd, 0xe6, 0x20, 0x3f, 0xf2, 0x95, 0x4e, 0xd4, },
+-}, {
+-      .ksize = 1,
+-      .key = "B",
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 247,
+-      .digest = (u8[]){ 0x2e, 0x74, 0x1c, 0x1d, 0x03, 0xf4, 0x9d, 0x84,
+-                        0x6f, 0xfc, 0x86, 0x32, 0x92, 0x49, 0x7e, 0x66,
+-                        0xd7, 0xc3, 0x10, 0x88, 0xfe, 0x28, 0xb3, 0xe0,
+-                        0xbf, 0x50, 0x75, 0xad, 0x8e, 0xa4, 0xe6, 0xb2, },
+-}, {
+-      .ksize = 16,
+-      .key = blake2_ordered_sequence,
+-      .plaintext = blake2_ordered_sequence,
+-      .psize = 256,
+-      .digest = (u8[]){ 0xb9, 0xd2, 0x81, 0x0e, 0x3a, 0xb1, 0x62, 0x9b,
+-                        0xad, 0x44, 0x05, 0xf4, 0x92, 0x2e, 0x99, 0xc1,
+-                        0x4a, 0x47, 0xbb, 0x5b, 0x6f, 0xb2, 0x96, 0xed,
+-                        0xd5, 0x06, 0xb5, 0x3a, 0x7c, 0x7a, 0x65, 0x1d, },
+-}};
+-
+ #endif        /* _CRYPTO_TESTMGR_H */
+diff --git a/include/crypto/internal/blake2s.h b/include/crypto/internal/blake2s.h
+index 52363eee2b20..506d56530ca9 100644
+--- a/include/crypto/internal/blake2s.h
++++ b/include/crypto/internal/blake2s.h
+@@ -8,7 +8,6 @@
+ #define _CRYPTO_INTERNAL_BLAKE2S_H
+ 
+ #include <crypto/blake2s.h>
+-#include <crypto/internal/hash.h>
+ #include <linux/string.h>
+ 
+ void blake2s_compress_generic(struct blake2s_state *state, const u8 *block,
+@@ -19,111 +18,4 @@ void blake2s_compress(struct blake2s_state *state, const u8 *block,
+ 
+ bool blake2s_selftest(void);
+ 
+-static inline void blake2s_set_lastblock(struct blake2s_state *state)
+-{
+-      state->f[0] = -1;
+-}
+-
+-/* Helper functions for BLAKE2s shared by the library and shash APIs */
+-
+-static __always_inline void
+-__blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen,
+-               bool force_generic)
+-{
+-      const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
+-
+-      if (unlikely(!inlen))
+-              return;
+-      if (inlen > fill) {
+-              memcpy(state->buf + state->buflen, in, fill);
+-              if (force_generic)
+-                      blake2s_compress_generic(state, state->buf, 1,
+-                                               BLAKE2S_BLOCK_SIZE);
+-              else
+-                      blake2s_compress(state, state->buf, 1,
+-                                       BLAKE2S_BLOCK_SIZE);
+-              state->buflen = 0;
+-              in += fill;
+-              inlen -= fill;
+-      }
+-      if (inlen > BLAKE2S_BLOCK_SIZE) {
+-              const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
+-              /* Hash one less (full) block than strictly possible */
+-              if (force_generic)
+-                      blake2s_compress_generic(state, in, nblocks - 1,
+-                                               BLAKE2S_BLOCK_SIZE);
+-              else
+-                      blake2s_compress(state, in, nblocks - 1,
+-                                       BLAKE2S_BLOCK_SIZE);
+-              in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+-              inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
+-      }
+-      memcpy(state->buf + state->buflen, in, inlen);
+-      state->buflen += inlen;
+-}
+-
+-static __always_inline void
+-__blake2s_final(struct blake2s_state *state, u8 *out, bool force_generic)
+-{
+-      blake2s_set_lastblock(state);
+-      memset(state->buf + state->buflen, 0,
+-             BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
+-      if (force_generic)
+-              blake2s_compress_generic(state, state->buf, 1, state->buflen);
+-      else
+-              blake2s_compress(state, state->buf, 1, state->buflen);
+-      cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
+-      memcpy(out, state->h, state->outlen);
+-}
+-
+-/* Helper functions for shash implementations of BLAKE2s */
+-
+-struct blake2s_tfm_ctx {
+-      u8 key[BLAKE2S_KEY_SIZE];
+-      unsigned int keylen;
+-};
+-
+-static inline int crypto_blake2s_setkey(struct crypto_shash *tfm,
+-                                      const u8 *key, unsigned int keylen)
+-{
+-      struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(tfm);
+-
+-      if (keylen == 0 || keylen > BLAKE2S_KEY_SIZE)
+-              return -EINVAL;
+-
+-      memcpy(tctx->key, key, keylen);
+-      tctx->keylen = keylen;
+-
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_init(struct shash_desc *desc)
+-{
+-      const struct blake2s_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-      unsigned int outlen = crypto_shash_digestsize(desc->tfm);
+-
+-      __blake2s_init(state, outlen, tctx->key, tctx->keylen);
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_update(struct shash_desc *desc,
+-                                      const u8 *in, unsigned int inlen,
+-                                      bool force_generic)
+-{
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-
+-      __blake2s_update(state, in, inlen, force_generic);
+-      return 0;
+-}
+-
+-static inline int crypto_blake2s_final(struct shash_desc *desc, u8 *out,
+-                                     bool force_generic)
+-{
+-      struct blake2s_state *state = shash_desc_ctx(desc);
+-
+-      __blake2s_final(state, out, force_generic);
+-      return 0;
+-}
+-
+ #endif /* _CRYPTO_INTERNAL_BLAKE2S_H */
+diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c
+index 409e4b728770..66f505220f43 100644
+--- a/lib/crypto/blake2s-selftest.c
++++ b/lib/crypto/blake2s-selftest.c
+@@ -4,6 +4,8 @@
+  */
+ 
+ #include <crypto/internal/blake2s.h>
++#include <linux/kernel.h>
++#include <linux/random.h>
+ #include <linux/string.h>
+ 
+ /*
+@@ -587,5 +589,44 @@ bool __init blake2s_selftest(void)
+               }
+       }
+ 
++      for (i = 0; i < 32; ++i) {
++              enum { TEST_ALIGNMENT = 16 };
++              u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1]
++                                      __aligned(TEST_ALIGNMENT);
++              u8 blocks[BLAKE2S_BLOCK_SIZE * 3];
++              struct blake2s_state state1, state2;
++
++              get_random_bytes(blocks, sizeof(blocks));
++              get_random_bytes(&state, sizeof(state));
++
++#if defined(CONFIG_CRYPTO_LIB_BLAKE2S_GENERIC) && \
++    defined(CONFIG_CRYPTO_ARCH_HAVE_LIB_BLAKE2S)
++              memcpy(&state1, &state, sizeof(state1));
++              memcpy(&state2, &state, sizeof(state2));
++              blake2s_compress(&state1, blocks, 3, BLAKE2S_BLOCK_SIZE);
++              blake2s_compress_generic(&state2, blocks, 3, BLAKE2S_BLOCK_SIZE);
++              if (memcmp(&state1, &state2, sizeof(state1))) {
++                      pr_err("blake2s random compress self-test %d: FAIL\n",
++                             i + 1);
++                      success = false;
++              }
++#endif
++
++              memcpy(&state1, &state, sizeof(state1));
++              blake2s_compress(&state1, blocks, 1, BLAKE2S_BLOCK_SIZE);
++              for (l = 1; l < TEST_ALIGNMENT; ++l) {
++                      memcpy(unaligned_block + l, blocks,
++                             BLAKE2S_BLOCK_SIZE);
++                      memcpy(&state2, &state, sizeof(state2));
++                      blake2s_compress(&state2, unaligned_block + l, 1,
++                                       BLAKE2S_BLOCK_SIZE);
++                      if (memcmp(&state1, &state2, sizeof(state1))) {
++                              pr_err("blake2s random compress align %d self-test %d: FAIL\n",
++                                     l, i + 1);
++                              success = false;
++                      }
++              }
++      }
++
+       return success;
+ }
+diff --git a/lib/crypto/blake2s.c b/lib/crypto/blake2s.c
+index c71c09621c09..98e688c6d891 100644
+--- a/lib/crypto/blake2s.c
++++ b/lib/crypto/blake2s.c
+@@ -16,16 +16,44 @@
+ #include <linux/init.h>
+ #include <linux/bug.h>
+ 
++static inline void blake2s_set_lastblock(struct blake2s_state *state)
++{
++      state->f[0] = -1;
++}
++
+ void blake2s_update(struct blake2s_state *state, const u8 *in, size_t inlen)
+ {
+-      __blake2s_update(state, in, inlen, false);
++      const size_t fill = BLAKE2S_BLOCK_SIZE - state->buflen;
++
++      if (unlikely(!inlen))
++              return;
++      if (inlen > fill) {
++              memcpy(state->buf + state->buflen, in, fill);
++              blake2s_compress(state, state->buf, 1, BLAKE2S_BLOCK_SIZE);
++              state->buflen = 0;
++              in += fill;
++              inlen -= fill;
++      }
++      if (inlen > BLAKE2S_BLOCK_SIZE) {
++              const size_t nblocks = DIV_ROUND_UP(inlen, BLAKE2S_BLOCK_SIZE);
++              blake2s_compress(state, in, nblocks - 1, BLAKE2S_BLOCK_SIZE);
++              in += BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++              inlen -= BLAKE2S_BLOCK_SIZE * (nblocks - 1);
++      }
++      memcpy(state->buf + state->buflen, in, inlen);
++      state->buflen += inlen;
+ }
+ EXPORT_SYMBOL(blake2s_update);
+ 
+ void blake2s_final(struct blake2s_state *state, u8 *out)
+ {
+       WARN_ON(IS_ENABLED(DEBUG) && !out);
+-      __blake2s_final(state, out, false);
++      blake2s_set_lastblock(state);
++      memset(state->buf + state->buflen, 0,
++             BLAKE2S_BLOCK_SIZE - state->buflen); /* Padding */
++      blake2s_compress(state, state->buf, 1, state->buflen);
++      cpu_to_le32_array(state->h, ARRAY_SIZE(state->h));
++      memcpy(out, state->h, state->outlen);
+       memzero_explicit(state, sizeof(*state));
+ }
+ EXPORT_SYMBOL(blake2s_final);
+@@ -38,12 +66,7 @@ static int __init blake2s_mod_init(void)
+       return 0;
+ }
+ 
+-static void __exit blake2s_mod_exit(void)
+-{
+-}
+-
+ module_init(blake2s_mod_init);
+-module_exit(blake2s_mod_exit);
+ MODULE_LICENSE("GPL v2");
+ MODULE_DESCRIPTION("BLAKE2s hash function");
+ MODULE_AUTHOR("Jason A. Donenfeld <Jason@zx2c4.com>");
+-- 
+2.35.1
+
diff --git a/queue-5.19/crypto-ccp-use-kzalloc-for-sev-ioctl-interfaces-to-p.patch b/queue-5.19/crypto-ccp-use-kzalloc-for-sev-ioctl-interfaces-to-p.patch

new file mode 100644 (file)

index 0000000..0fbb265
--- /dev/null
+++ b/queue-5.19/crypto-ccp-use-kzalloc-for-sev-ioctl-interfaces-to-p.patch
@@ -0,0 +1,90 @@
+From 3d3deb65f4881a79e177b5234f8547c5c212e78e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 18 May 2022 15:31:26 +0000
+Subject: crypto: ccp - Use kzalloc for sev ioctl interfaces to prevent kernel
+ memory leak
+
+From: John Allen <john.allen@amd.com>
+
+[ Upstream commit 13dc15a3f5fd7f884e4bfa8c011a0ae868df12ae ]
+
+For some sev ioctl interfaces, input may be passed that is less than or
+equal to SEV_FW_BLOB_MAX_SIZE, but larger than the data that PSP
+firmware returns. In this case, kmalloc will allocate memory that is the
+size of the input rather than the size of the data. Since PSP firmware
+doesn't fully overwrite the buffer, the sev ioctl interfaces with the
+issue may return uninitialized slab memory.
+
+Currently, all of the ioctl interfaces in the ccp driver are safe, but
+to prevent future problems, change all ioctl interfaces that allocate
+memory with kmalloc to use kzalloc and memset the data buffer to zero
+in sev_ioctl_do_platform_status.
+
+Fixes: 38103671aad3 ("crypto: ccp: Use the stack and common buffer for status commands")
+Fixes: e799035609e15 ("crypto: ccp: Implement SEV_PEK_CSR ioctl command")
+Fixes: 76a2b524a4b1d ("crypto: ccp: Implement SEV_PDH_CERT_EXPORT ioctl command")
+Fixes: d6112ea0cb344 ("crypto: ccp - introduce SEV_GET_ID2 command")
+Cc: stable@vger.kernel.org
+Reported-by: Andy Nguyen <theflow@google.com>
+Suggested-by: David Rientjes <rientjes@google.com>
+Suggested-by: Peter Gonda <pgonda@google.com>
+Signed-off-by: John Allen <john.allen@amd.com>
+Reviewed-by: Peter Gonda <pgonda@google.com>
+Acked-by: David Rientjes <rientjes@google.com>
+Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/crypto/ccp/sev-dev.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
+index 799b476fc3e8..0c92d940ac4e 100644
+--- a/drivers/crypto/ccp/sev-dev.c
++++ b/drivers/crypto/ccp/sev-dev.c
+@@ -577,6 +577,8 @@ static int sev_ioctl_do_platform_status(struct sev_issue_cmd *argp)
+       struct sev_user_data_status data;
+       int ret;
+ 
++      memset(&data, 0, sizeof(data));
++
+       ret = __sev_do_cmd_locked(SEV_CMD_PLATFORM_STATUS, &data, &argp->error);
+       if (ret)
+               return ret;
+@@ -630,7 +632,7 @@ static int sev_ioctl_do_pek_csr(struct sev_issue_cmd *argp, bool writable)
+       if (input.length > SEV_FW_BLOB_MAX_SIZE)
+               return -EFAULT;
+ 
+-      blob = kmalloc(input.length, GFP_KERNEL);
++      blob = kzalloc(input.length, GFP_KERNEL);
+       if (!blob)
+               return -ENOMEM;
+ 
+@@ -854,7 +856,7 @@ static int sev_ioctl_do_get_id2(struct sev_issue_cmd *argp)
+       input_address = (void __user *)input.address;
+ 
+       if (input.address && input.length) {
+-              id_blob = kmalloc(input.length, GFP_KERNEL);
++              id_blob = kzalloc(input.length, GFP_KERNEL);
+               if (!id_blob)
+                       return -ENOMEM;
+ 
+@@ -973,14 +975,14 @@ static int sev_ioctl_do_pdh_export(struct sev_issue_cmd *argp, bool writable)
+       if (input.cert_chain_len > SEV_FW_BLOB_MAX_SIZE)
+               return -EFAULT;
+ 
+-      pdh_blob = kmalloc(input.pdh_cert_len, GFP_KERNEL);
++      pdh_blob = kzalloc(input.pdh_cert_len, GFP_KERNEL);
+       if (!pdh_blob)
+               return -ENOMEM;
+ 
+       data.pdh_cert_address = __psp_pa(pdh_blob);
+       data.pdh_cert_len = input.pdh_cert_len;
+ 
+-      cert_blob = kmalloc(input.cert_chain_len, GFP_KERNEL);
++      cert_blob = kzalloc(input.cert_chain_len, GFP_KERNEL);
+       if (!cert_blob) {
+               ret = -ENOMEM;
+               goto e_free_pdh;
+-- 
+2.35.1
+
diff --git a/queue-5.19/csky-abiv1-fixup-compile-error.patch-25803 b/queue-5.19/csky-abiv1-fixup-compile-error.patch-25803

new file mode 100644 (file)

index 0000000..b30581b
--- /dev/null
+++ b/queue-5.19/csky-abiv1-fixup-compile-error.patch-25803
@@ -0,0 +1,46 @@
+From eea6736378d992206ba2376b2dae701fa4a3fabd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 31 Jul 2022 22:34:24 -0400
+Subject: csky: abiv1: Fixup compile error
+
+From: Guo Ren <guoren@linux.alibaba.com>
+
+[ Upstream commit 45fef4c4b9c94e86d9c13f0b2e7e71bb32254509 ]
+
+  LD      vmlinux.o
+arch/csky/lib/string.o: In function `memmove':
+string.c:(.text+0x108): multiple definition of `memmove'
+lib/string.o:string.c:(.text+0x7e8): first defined here
+arch/csky/lib/string.o: In function `memset':
+string.c:(.text+0x148): multiple definition of `memset'
+lib/string.o:string.c:(.text+0x2ac): first defined here
+scripts/Makefile.vmlinux_o:68: recipe for target 'vmlinux.o' failed
+make[4]: *** [vmlinux.o] Error 1
+
+Fixes: e4df2d5e852a ("csky: Add C based string functions")
+Signed-off-by: Guo Ren <guoren@linux.alibaba.com>
+Signed-off-by: Guo Ren <guoren@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/csky/abiv1/inc/abi/string.h | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/arch/csky/abiv1/inc/abi/string.h b/arch/csky/abiv1/inc/abi/string.h
+index 9d95594b0feb..de50117b904d 100644
+--- a/arch/csky/abiv1/inc/abi/string.h
++++ b/arch/csky/abiv1/inc/abi/string.h
+@@ -6,4 +6,10 @@
+ #define __HAVE_ARCH_MEMCPY
+ extern void *memcpy(void *, const void *, __kernel_size_t);
+ 
++#define __HAVE_ARCH_MEMMOVE
++extern void *memmove(void *, const void *, __kernel_size_t);
++
++#define __HAVE_ARCH_MEMSET
++extern void *memset(void *, int,  __kernel_size_t);
++
+ #endif /* __ABI_CSKY_STRING_H */
+-- 
+2.35.1
+
diff --git a/queue-5.19/dm-fix-dm-raid-crash-if-md_handle_request-splits-bio.patch b/queue-5.19/dm-fix-dm-raid-crash-if-md_handle_request-splits-bio.patch

new file mode 100644 (file)

index 0000000..bc2517e
--- /dev/null
+++ b/queue-5.19/dm-fix-dm-raid-crash-if-md_handle_request-splits-bio.patch
@@ -0,0 +1,141 @@
+From 033448c1277b287ae1beffc7fe311fcda224825b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Jul 2022 13:58:04 -0400
+Subject: dm: fix dm-raid crash if md_handle_request() splits bio
+
+From: Mike Snitzer <snitzer@kernel.org>
+
+[ Upstream commit 9dd1cd3220eca534f2d47afad7ce85f4c40118d8 ]
+
+Commit ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone")
+introduced the optimization to _not_ perform bio_associate_blkg()'s
+relatively costly work when DM core clones its bio. But in doing so it
+exposed the possibility for DM's cloned bio to alter DM target
+behavior (e.g. crash) if a target were to issue IO without first
+calling bio_set_dev().
+
+The DM raid target can trigger an MD crash due to its need to split
+the DM bio that is passed to md_handle_request(). The split will
+recurse to submit_bio_noacct() using a bio with an uninitialized
+->bi_blkg. This NULL bio->bi_blkg causes blk_throtl_bio() to
+dereference a NULL blkg_to_tg(bio->bi_blkg).
+
+Fix this in DM core by adding a new 'needs_bio_set_dev' target flag that
+will make alloc_tio() call bio_set_dev() on behalf of the target.
+dm-raid is the only target that requires this flag. bio_set_dev()
+initializes the DM cloned bio's ->bi_blkg, using bio_associate_blkg,
+before passing the bio to md_handle_request().
+
+Long-term fix would be to audit and refactor MD code to rely on DM to
+split its bio, using dm_accept_partial_bio(), but there are MD raid
+personalities (e.g. raid1 and raid10) whose implementation are tightly
+coupled to handling the bio splitting inline.
+
+Fixes: ca522482e3eaf ("dm: pass NULL bdev to bio_alloc_clone")
+Cc: stable@vger.kernel.org
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-raid.c          |  1 +
+ drivers/md/dm.c               | 13 ++++++-------
+ include/linux/device-mapper.h |  6 ++++++
+ include/uapi/linux/dm-ioctl.h |  4 ++--
+ 4 files changed, 15 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index a55fc28d2a29..ba3638d1d046 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3097,6 +3097,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
+       INIT_WORK(&rs->md.event_work, do_table_event);
+       ti->private = rs;
+       ti->num_flush_bios = 1;
++      ti->needs_bio_set_dev = true;
+ 
+       /* Restore any requested new layout for conversion decision */
+       rs_config_restore(rs, &rs_layout);
+diff --git a/drivers/md/dm.c b/drivers/md/dm.c
+index 2b75f1ef7386..36c704b50ac3 100644
+--- a/drivers/md/dm.c
++++ b/drivers/md/dm.c
+@@ -578,9 +578,6 @@ static struct dm_io *alloc_io(struct mapped_device *md, struct bio *bio)
+       struct bio *clone;
+ 
+       clone = bio_alloc_clone(NULL, bio, GFP_NOIO, &md->mempools->io_bs);
+-      /* Set default bdev, but target must bio_set_dev() before issuing IO */
+-      clone->bi_bdev = md->disk->part0;
+-
+       tio = clone_to_tio(clone);
+       tio->flags = 0;
+       dm_tio_set_flag(tio, DM_TIO_INSIDE_DM_IO);
+@@ -614,6 +611,7 @@ static void free_io(struct dm_io *io)
+ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
+                            unsigned target_bio_nr, unsigned *len, gfp_t gfp_mask)
+ {
++      struct mapped_device *md = ci->io->md;
+       struct dm_target_io *tio;
+       struct bio *clone;
+ 
+@@ -623,14 +621,10 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
+               /* alloc_io() already initialized embedded clone */
+               clone = &tio->clone;
+       } else {
+-              struct mapped_device *md = ci->io->md;
+-
+               clone = bio_alloc_clone(NULL, ci->bio, gfp_mask,
+                                       &md->mempools->bs);
+               if (!clone)
+                       return NULL;
+-              /* Set default bdev, but target must bio_set_dev() before issuing IO */
+-              clone->bi_bdev = md->disk->part0;
+ 
+               /* REQ_DM_POLL_LIST shouldn't be inherited */
+               clone->bi_opf &= ~REQ_DM_POLL_LIST;
+@@ -646,6 +640,11 @@ static struct bio *alloc_tio(struct clone_info *ci, struct dm_target *ti,
+       tio->len_ptr = len;
+       tio->old_sector = 0;
+ 
++      /* Set default bdev, but target must bio_set_dev() before issuing IO */
++      clone->bi_bdev = md->disk->part0;
++      if (unlikely(ti->needs_bio_set_dev))
++              bio_set_dev(clone, md->disk->part0);
++
+       if (len) {
+               clone->bi_iter.bi_size = to_bytes(*len);
+               if (bio_integrity(clone))
+diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
+index 47a01c7cffdf..e9c043f12e53 100644
+--- a/include/linux/device-mapper.h
++++ b/include/linux/device-mapper.h
+@@ -373,6 +373,12 @@ struct dm_target {
+        * after returning DM_MAPIO_SUBMITTED from its map function.
+        */
+       bool accounts_remapped_io:1;
++
++      /*
++       * Set if the target will submit the DM bio without first calling
++       * bio_set_dev(). NOTE: ideally a target should _not_ need this.
++       */
++      bool needs_bio_set_dev:1;
+ };
+ 
+ void *dm_per_bio_data(struct bio *bio, size_t data_size);
+diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
+index 2e9550fef90f..27ad9671f2df 100644
+--- a/include/uapi/linux/dm-ioctl.h
++++ b/include/uapi/linux/dm-ioctl.h
+@@ -286,9 +286,9 @@ enum {
+ #define DM_DEV_SET_GEOMETRY   _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
+ 
+ #define DM_VERSION_MAJOR      4
+-#define DM_VERSION_MINOR      46
++#define DM_VERSION_MINOR      47
+ #define DM_VERSION_PATCHLEVEL 0
+-#define DM_VERSION_EXTRA      "-ioctl (2022-02-22)"
++#define DM_VERSION_EXTRA      "-ioctl (2022-07-28)"
+ 
+ /* Status bits */
+ #define DM_READONLY_FLAG      (1 << 0) /* In/Out */
+-- 
+2.35.1
+
diff --git a/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch b/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch

new file mode 100644 (file)

index 0000000..1d39f72
--- /dev/null
+++ b/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch
@@ -0,0 +1,38 @@
+From 5d963e809c6dcee4b6939a83155ce888494dd925 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:33:52 -0400
+Subject: dm raid: fix address sanitizer warning in raid_resume
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 7dad24db59d2d2803576f2e3645728866a056dab ]
+
+There is a KASAN warning in raid_resume when running the lvm test
+lvconvert-raid.sh. The reason for the warning is that mddev->raid_disks
+is greater than rs->raid_disks, so the loop touches one entry beyond
+the allocated length.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-raid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index 4e7f870b2277..a55fc28d2a29 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3819,7 +3819,7 @@ static void attempt_restore_of_faulty_devices(struct raid_set *rs)
+ 
+       memset(cleared_failed_devices, 0, sizeof(cleared_failed_devices));
+ 
+-      for (i = 0; i < mddev->raid_disks; i++) {
++      for (i = 0; i < rs->raid_disks; i++) {
+               r = &rs->dev[i].rdev;
+               /* HM FIXME: enhance journal device recovery processing */
+               if (test_bit(Journal, &r->flags))
+-- 
+2.35.1
+
diff --git a/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch b/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch

new file mode 100644 (file)

index 0000000..112f427
--- /dev/null
+++ b/queue-5.19/dm-raid-fix-address-sanitizer-warning-in-raid_status.patch
@@ -0,0 +1,68 @@
+From 8f57afae415618121d737d639496b34dd30fbcf2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:31:35 -0400
+Subject: dm raid: fix address sanitizer warning in raid_status
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit 1fbeea217d8f297fe0e0956a1516d14ba97d0396 ]
+
+There is this warning when using a kernel with the address sanitizer
+and running this testsuite:
+https://gitlab.com/cki-project/kernel-tests/-/tree/main/storage/swraid/scsi_raid
+
+==================================================================
+BUG: KASAN: slab-out-of-bounds in raid_status+0x1747/0x2820 [dm_raid]
+Read of size 4 at addr ffff888079d2c7e8 by task lvcreate/13319
+CPU: 0 PID: 13319 Comm: lvcreate Not tainted 5.18.0-0.rc3.<snip> #1
+Hardware name: Red Hat KVM, BIOS 0.5.1 01/01/2011
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x6a/0x9c
+ print_address_description.constprop.0+0x1f/0x1e0
+ print_report.cold+0x55/0x244
+ kasan_report+0xc9/0x100
+ raid_status+0x1747/0x2820 [dm_raid]
+ dm_ima_measure_on_table_load+0x4b8/0xca0 [dm_mod]
+ table_load+0x35c/0x630 [dm_mod]
+ ctl_ioctl+0x411/0x630 [dm_mod]
+ dm_ctl_ioctl+0xa/0x10 [dm_mod]
+ __x64_sys_ioctl+0x12a/0x1a0
+ do_syscall_64+0x5b/0x80
+
+The warning is caused by reading conf->max_nr_stripes in raid_status. The
+code in raid_status reads mddev->private, casts it to struct r5conf and
+reads the entry max_nr_stripes.
+
+However, if we have different raid type than 4/5/6, mddev->private
+doesn't point to struct r5conf; it may point to struct r0conf, struct
+r1conf, struct r10conf or struct mpconf. If we cast a pointer to one
+of these structs to struct r5conf, we will be reading invalid memory
+and KASAN warns about it.
+
+Fix this bug by reading struct r5conf only if raid type is 4, 5 or 6.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-raid.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
+index 80c9f7134e9b..4e7f870b2277 100644
+--- a/drivers/md/dm-raid.c
++++ b/drivers/md/dm-raid.c
+@@ -3509,7 +3509,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
+ {
+       struct raid_set *rs = ti->private;
+       struct mddev *mddev = &rs->md;
+-      struct r5conf *conf = mddev->private;
++      struct r5conf *conf = rs_is_raid456(rs) ? mddev->private : NULL;
+       int i, max_nr_stripes = conf ? conf->max_nr_stripes : 0;
+       unsigned long recovery;
+       unsigned int raid_param_cnt = 1; /* at least 1 for chunksize */
+-- 
+2.35.1
+
diff --git a/queue-5.19/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch b/queue-5.19/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch

new file mode 100644 (file)

index 0000000..91d8def
--- /dev/null
+++ b/queue-5.19/dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch
@@ -0,0 +1,96 @@
+From f18f1fd506778a048bce5dfdad4f6730ca0fba5a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 19:28:25 +0800
+Subject: dm thin: fix use-after-free crash in
+ dm_sm_register_threshold_callback
+
+From: Luo Meng <luomeng12@huawei.com>
+
+[ Upstream commit 3534e5a5ed2997ca1b00f44a0378a075bd05e8a3 ]
+
+Fault inject on pool metadata device reports:
+  BUG: KASAN: use-after-free in dm_pool_register_metadata_threshold+0x40/0x80
+  Read of size 8 at addr ffff8881b9d50068 by task dmsetup/950
+
+  CPU: 7 PID: 950 Comm: dmsetup Tainted: G        W         5.19.0-rc6 #1
+  Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-1.fc33 04/01/2014
+  Call Trace:
+   <TASK>
+   dump_stack_lvl+0x34/0x44
+   print_address_description.constprop.0.cold+0xeb/0x3f4
+   kasan_report.cold+0xe6/0x147
+   dm_pool_register_metadata_threshold+0x40/0x80
+   pool_ctr+0xa0a/0x1150
+   dm_table_add_target+0x2c8/0x640
+   table_load+0x1fd/0x430
+   ctl_ioctl+0x2c4/0x5a0
+   dm_ctl_ioctl+0xa/0x10
+   __x64_sys_ioctl+0xb3/0xd0
+   do_syscall_64+0x35/0x80
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+This can be easily reproduced using:
+  echo offline > /sys/block/sda/device/state
+  dd if=/dev/zero of=/dev/mapper/thin bs=4k count=10
+  dmsetup load pool --table "0 20971520 thin-pool /dev/sda /dev/sdb 128 0 0"
+
+If a metadata commit fails, the transaction will be aborted and the
+metadata space maps will be destroyed. If a DM table reload then
+happens for this failed thin-pool, a use-after-free will occur in
+dm_sm_register_threshold_callback (called from
+dm_pool_register_metadata_threshold).
+
+Fix this by in dm_pool_register_metadata_threshold() by returning the
+-EINVAL error if the thin-pool is in fail mode. Also fail pool_ctr()
+with a new error message: "Error registering metadata threshold".
+
+Fixes: ac8c3f3df65e4 ("dm thin: generate event when metadata threshold passed")
+Cc: stable@vger.kernel.org
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Luo Meng <luomeng12@huawei.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-thin-metadata.c | 7 +++++--
+ drivers/md/dm-thin.c          | 4 +++-
+ 2 files changed, 8 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/md/dm-thin-metadata.c b/drivers/md/dm-thin-metadata.c
+index 2db7030aba00..a27395c8621f 100644
+--- a/drivers/md/dm-thin-metadata.c
++++ b/drivers/md/dm-thin-metadata.c
+@@ -2045,10 +2045,13 @@ int dm_pool_register_metadata_threshold(struct dm_pool_metadata *pmd,
+                                       dm_sm_threshold_fn fn,
+                                       void *context)
+ {
+-      int r;
++      int r = -EINVAL;
+ 
+       pmd_write_lock_in_core(pmd);
+-      r = dm_sm_register_threshold_callback(pmd->metadata_sm, threshold, fn, context);
++      if (!pmd->fail_io) {
++              r = dm_sm_register_threshold_callback(pmd->metadata_sm,
++                                                    threshold, fn, context);
++      }
+       pmd_write_unlock(pmd);
+ 
+       return r;
+diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
+index 84c083f76673..e76c96c760a9 100644
+--- a/drivers/md/dm-thin.c
++++ b/drivers/md/dm-thin.c
+@@ -3375,8 +3375,10 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
+                                               calc_metadata_threshold(pt),
+                                               metadata_low_callback,
+                                               pool);
+-      if (r)
++      if (r) {
++              ti->error = "Error registering metadata threshold";
+               goto out_flags_changed;
++      }
+ 
+       dm_pool_register_pre_commit_callback(pool->pmd,
+                                            metadata_pre_commit_callback, pool);
+-- 
+2.35.1
+
diff --git a/queue-5.19/dm-writecache-set-a-default-max_writeback_jobs.patch b/queue-5.19/dm-writecache-set-a-default-max_writeback_jobs.patch

new file mode 100644 (file)

index 0000000..fe6b111
--- /dev/null
+++ b/queue-5.19/dm-writecache-set-a-default-max_writeback_jobs.patch
@@ -0,0 +1,41 @@
+From 83402c76c0b6106f058eb77746a1cec1dd7273fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 07:09:04 -0400
+Subject: dm writecache: set a default MAX_WRITEBACK_JOBS
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit ca7dc242e358e46d963b32f9d9dd829785a9e957 ]
+
+dm-writecache has the capability to limit the number of writeback jobs
+in progress. However, this feature was off by default. As such there
+were some out-of-memory crashes observed when lowering the low
+watermark while the cache is full.
+
+This commit enables writeback limit by default. It is set to 256MiB or
+1/16 of total system memory, whichever is smaller.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Signed-off-by: Mike Snitzer <snitzer@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/dm-writecache.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
+index d74c5a7a0ab4..d572135cd43f 100644
+--- a/drivers/md/dm-writecache.c
++++ b/drivers/md/dm-writecache.c
+@@ -22,7 +22,7 @@
+ 
+ #define HIGH_WATERMARK                        50
+ #define LOW_WATERMARK                 45
+-#define MAX_WRITEBACK_JOBS            0
++#define MAX_WRITEBACK_JOBS            min(0x10000000 / PAGE_SIZE, totalram_pages() / 16)
+ #define ENDIO_LATENCY                 16
+ #define WRITEBACK_LATENCY             64
+ #define AUTOCOMMIT_BLOCKS_SSD         65536
+-- 
+2.35.1
+
diff --git a/queue-5.19/documentation-ext4-fix-cell-spacing-of-table-heading.patch b/queue-5.19/documentation-ext4-fix-cell-spacing-of-table-heading.patch

new file mode 100644 (file)

index 0000000..572f9e9
--- /dev/null
+++ b/queue-5.19/documentation-ext4-fix-cell-spacing-of-table-heading.patch
@@ -0,0 +1,57 @@
+From 37b309287fd69f28f94e0c61d013a03f95872108 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 19 Jun 2022 14:29:39 +0700
+Subject: Documentation: ext4: fix cell spacing of table heading on blockmap
+ table
+
+From: Bagas Sanjaya <bagasdotme@gmail.com>
+
+[ Upstream commit 442ec1e5bb7c46c72c41898e13a5744c84cadf51 ]
+
+Commit 3103084afcf234 ("ext4, doc: remove unnecessary escaping") removes
+redundant underscore escaping, however the cell spacing in heading row of
+blockmap table became not aligned anymore, hence triggers malformed table
+warning:
+
+Documentation/filesystems/ext4/blockmap.rst:3: WARNING: Malformed table.
+
++---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+| i.i_block Offset   | Where It Points                                                                                                                                                                                                              |
+<snipped>...
+
+The warning caused the table not being loaded.
+
+Realign the heading row cell by adding missing space at the first cell
+to fix the warning.
+
+Fixes: 3103084afcf234 ("ext4, doc: remove unnecessary escaping")
+Cc: stable@kernel.org
+Cc: Andreas Dilger <adilger.kernel@dilger.ca>
+Cc: Jonathan Corbet <corbet@lwn.net>
+Cc: Wang Jianjian <wangjianjian3@huawei.com>
+Cc: linux-ext4@vger.kernel.org
+Cc: linux-kernel@vger.kernel.org
+Signed-off-by: Bagas Sanjaya <bagasdotme@gmail.com>
+Link: https://lore.kernel.org/r/20220619072938.7334-1-bagasdotme@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/filesystems/ext4/blockmap.rst | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/Documentation/filesystems/ext4/blockmap.rst b/Documentation/filesystems/ext4/blockmap.rst
+index 2bd990402a5c..cc596541ce79 100644
+--- a/Documentation/filesystems/ext4/blockmap.rst
++++ b/Documentation/filesystems/ext4/blockmap.rst
+@@ -1,7 +1,7 @@
+ .. SPDX-License-Identifier: GPL-2.0
+ 
+ +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+-| i.i_block Offset   | Where It Points                                                                                                                                                                                                              |
++| i.i_block Offset    | Where It Points                                                                                                                                                                                                              |
+ +=====================+==============================================================================================================================================================================================================================+
+ | 0 to 11             | Direct map to file blocks 0 to 11.                                                                                                                                                                                           |
+ +---------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
+-- 
+2.35.1
+
diff --git a/queue-5.19/drivers-base-fix-userspace-break-from-using-bin_attr.patch b/queue-5.19/drivers-base-fix-userspace-break-from-using-bin_attr.patch

new file mode 100644 (file)

index 0000000..2adb038
--- /dev/null
+++ b/queue-5.19/drivers-base-fix-userspace-break-from-using-bin_attr.patch
@@ -0,0 +1,188 @@
+From b1dac98d489b9236a15b491dd955722d3a7e7b20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 09:49:24 -0400
+Subject: drivers/base: fix userspace break from using bin_attributes for
+ cpumap and cpulist
+
+From: Phil Auld <pauld@redhat.com>
+
+[ Upstream commit 7ee951acd31a88f941fd6535fbdee3a1567f1d63 ]
+
+Using bin_attributes with a 0 size causes fstat and friends to return that
+0 size. This breaks userspace code that retrieves the size before reading
+the file. Rather than reverting 75bd50fa841 ("drivers/base/node.c: use
+bin_attribute to break the size limitation of cpumap ABI") let's put in a
+size value at compile time.
+
+For cpulist the maximum size is on the order of
+       NR_CPUS * (ceil(log10(NR_CPUS)) + 1)/2
+
+which for 8192 is 20480 (8192 * 5)/2. In order to get near that you'd need
+a system with every other CPU on one node. For example: (0,2,4,8, ... ).
+To simplify the math and support larger NR_CPUS in the future we are using
+(NR_CPUS * 7)/2. We also set it to a min of PAGE_SIZE to retain the older
+behavior for smaller NR_CPUS.
+
+The cpumap file the size works out to be NR_CPUS/4 + NR_CPUS/32 - 1
+(or NR_CPUS * 9/32 - 1) including the ","s.
+
+Add a set of macros for these values to cpumask.h so they can be used in
+multiple places. Apply these to the handful of such files in
+drivers/base/topology.c as well as node.c.
+
+As an example, on an 80 cpu 4-node system (NR_CPUS == 8192):
+
+before:
+
+-r--r--r--. 1 root root 0 Jul 12 14:08 system/node/node0/cpulist
+-r--r--r--. 1 root root 0 Jul 11 17:25 system/node/node0/cpumap
+
+after:
+
+-r--r--r--. 1 root root 28672 Jul 13 11:32 system/node/node0/cpulist
+-r--r--r--. 1 root root  4096 Jul 13 11:31 system/node/node0/cpumap
+
+CONFIG_NR_CPUS = 16384
+-r--r--r--. 1 root root 57344 Jul 13 14:03 system/node/node0/cpulist
+-r--r--r--. 1 root root  4607 Jul 13 14:02 system/node/node0/cpumap
+
+The actual number of cpus doesn't matter for the reported size since they
+are based on NR_CPUS.
+
+Fixes: 75bd50fa841d ("drivers/base/node.c: use bin_attribute to break the size limitation of cpumap ABI")
+Fixes: bb9ec13d156e ("topology: use bin_attribute to break the size limitation of cpumap ABI")
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: "Rafael J. Wysocki" <rafael@kernel.org>
+Cc: Yury Norov <yury.norov@gmail.com>
+Cc: stable@vger.kernel.org
+Acked-by: Yury Norov <yury.norov@gmail.com> (for include/linux/cpumask.h)
+Signed-off-by: Phil Auld <pauld@redhat.com>
+Link: https://lore.kernel.org/r/20220715134924.3466194-1-pauld@redhat.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/base/node.c     |  4 ++--
+ drivers/base/topology.c | 32 ++++++++++++++++----------------
+ include/linux/cpumask.h | 18 ++++++++++++++++++
+ 3 files changed, 36 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/base/node.c b/drivers/base/node.c
+index 0ac6376ef7a1..eb0f43784c2b 100644
+--- a/drivers/base/node.c
++++ b/drivers/base/node.c
+@@ -45,7 +45,7 @@ static inline ssize_t cpumap_read(struct file *file, struct kobject *kobj,
+       return n;
+ }
+ 
+-static BIN_ATTR_RO(cpumap, 0);
++static BIN_ATTR_RO(cpumap, CPUMAP_FILE_MAX_BYTES);
+ 
+ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+                                  struct bin_attribute *attr, char *buf,
+@@ -66,7 +66,7 @@ static inline ssize_t cpulist_read(struct file *file, struct kobject *kobj,
+       return n;
+ }
+ 
+-static BIN_ATTR_RO(cpulist, 0);
++static BIN_ATTR_RO(cpulist, CPULIST_FILE_MAX_BYTES);
+ 
+ /**
+  * struct node_access_nodes - Access class device to hold user visible
+diff --git a/drivers/base/topology.c b/drivers/base/topology.c
+index ac6ad9ab67f9..89f98be5c5b9 100644
+--- a/drivers/base/topology.c
++++ b/drivers/base/topology.c
+@@ -62,47 +62,47 @@ define_id_show_func(ppin, "0x%llx");
+ static DEVICE_ATTR_ADMIN_RO(ppin);
+ 
+ define_siblings_read_func(thread_siblings, sibling_cpumask);
+-static BIN_ATTR_RO(thread_siblings, 0);
+-static BIN_ATTR_RO(thread_siblings_list, 0);
++static BIN_ATTR_RO(thread_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(thread_siblings_list, CPULIST_FILE_MAX_BYTES);
+ 
+ define_siblings_read_func(core_cpus, sibling_cpumask);
+-static BIN_ATTR_RO(core_cpus, 0);
+-static BIN_ATTR_RO(core_cpus_list, 0);
++static BIN_ATTR_RO(core_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_cpus_list, CPULIST_FILE_MAX_BYTES);
+ 
+ define_siblings_read_func(core_siblings, core_cpumask);
+-static BIN_ATTR_RO(core_siblings, 0);
+-static BIN_ATTR_RO(core_siblings_list, 0);
++static BIN_ATTR_RO(core_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(core_siblings_list, CPULIST_FILE_MAX_BYTES);
+ 
+ #ifdef TOPOLOGY_CLUSTER_SYSFS
+ define_siblings_read_func(cluster_cpus, cluster_cpumask);
+-static BIN_ATTR_RO(cluster_cpus, 0);
+-static BIN_ATTR_RO(cluster_cpus_list, 0);
++static BIN_ATTR_RO(cluster_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(cluster_cpus_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ 
+ #ifdef TOPOLOGY_DIE_SYSFS
+ define_siblings_read_func(die_cpus, die_cpumask);
+-static BIN_ATTR_RO(die_cpus, 0);
+-static BIN_ATTR_RO(die_cpus_list, 0);
++static BIN_ATTR_RO(die_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(die_cpus_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ 
+ define_siblings_read_func(package_cpus, core_cpumask);
+-static BIN_ATTR_RO(package_cpus, 0);
+-static BIN_ATTR_RO(package_cpus_list, 0);
++static BIN_ATTR_RO(package_cpus, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(package_cpus_list, CPULIST_FILE_MAX_BYTES);
+ 
+ #ifdef TOPOLOGY_BOOK_SYSFS
+ define_id_show_func(book_id, "%d");
+ static DEVICE_ATTR_RO(book_id);
+ define_siblings_read_func(book_siblings, book_cpumask);
+-static BIN_ATTR_RO(book_siblings, 0);
+-static BIN_ATTR_RO(book_siblings_list, 0);
++static BIN_ATTR_RO(book_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(book_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ 
+ #ifdef TOPOLOGY_DRAWER_SYSFS
+ define_id_show_func(drawer_id, "%d");
+ static DEVICE_ATTR_RO(drawer_id);
+ define_siblings_read_func(drawer_siblings, drawer_cpumask);
+-static BIN_ATTR_RO(drawer_siblings, 0);
+-static BIN_ATTR_RO(drawer_siblings_list, 0);
++static BIN_ATTR_RO(drawer_siblings, CPUMAP_FILE_MAX_BYTES);
++static BIN_ATTR_RO(drawer_siblings_list, CPULIST_FILE_MAX_BYTES);
+ #endif
+ 
+ static struct bin_attribute *bin_attrs[] = {
+diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
+index fe29ac7cc469..4592d0845941 100644
+--- a/include/linux/cpumask.h
++++ b/include/linux/cpumask.h
+@@ -1071,4 +1071,22 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask,
+       [0] =  1UL                                                      \
+ } }
+ 
++/*
++ * Provide a valid theoretical max size for cpumap and cpulist sysfs files
++ * to avoid breaking userspace which may allocate a buffer based on the size
++ * reported by e.g. fstat.
++ *
++ * for cpumap NR_CPUS * 9/32 - 1 should be an exact length.
++ *
++ * For cpulist 7 is (ceil(log10(NR_CPUS)) + 1) allowing for NR_CPUS to be up
++ * to 2 orders of magnitude larger than 8192. And then we divide by 2 to
++ * cover a worst-case of every other cpu being on one of two nodes for a
++ * very large NR_CPUS.
++ *
++ *  Use PAGE_SIZE as a minimum for smaller configurations.
++ */
++#define CPUMAP_FILE_MAX_BYTES  ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \
++                                      ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE)
++#define CPULIST_FILE_MAX_BYTES  (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE)
++
+ #endif /* __LINUX_CPUMASK_H */
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-amdgpu-check-bo-s-requested-pinning-domains-agai.patch b/queue-5.19/drm-amdgpu-check-bo-s-requested-pinning-domains-agai.patch

new file mode 100644 (file)

index 0000000..b1f7e69
--- /dev/null
+++ b/queue-5.19/drm-amdgpu-check-bo-s-requested-pinning-domains-agai.patch
@@ -0,0 +1,55 @@
+From 4eac50b82fb2d529a4802828b19b092b0cbf0715 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:30:29 -0400
+Subject: drm/amdgpu: Check BO's requested pinning domains against its
+ preferred_domains
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Leo Li <sunpeng.li@amd.com>
+
+[ Upstream commit f5ba14043621f4afdf3ad5f92ee2d8dbebbe4340 ]
+
+When pinning a buffer, we should check to see if there are any
+additional restrictions imposed by bo->preferred_domains. This will
+prevent the BO from being moved to an invalid domain when pinning.
+
+For example, this can happen if the user requests to create a BO in GTT
+domain for display scanout. amdgpu_dm will allow pinning to either VRAM
+or GTT domains, since DCN can scanout from either or. However, in
+amdgpu_bo_pin_restricted(), pinning to VRAM is preferred if there is
+adequate carveout. This can lead to pinning to VRAM despite the user
+requesting GTT placement for the BO.
+
+v2: Allow the kernel to override the domain, which can happen when
+    exporting a BO to a V4L camera (for example).
+
+Signed-off-by: Leo Li <sunpeng.li@amd.com>
+Reviewed-by: Alex Deucher <alexander.deucher@amd.com>
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdgpu/amdgpu_object.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+index 2c82b1d5a0d7..4570ad449390 100644
+--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
++++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+@@ -882,6 +882,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
+       if (WARN_ON_ONCE(min_offset > max_offset))
+               return -EINVAL;
+ 
++      /* Check domain to be pinned to against preferred domains */
++      if (bo->preferred_domains & domain)
++              domain = bo->preferred_domains & domain;
++
+       /* A shared bo cannot be migrated to VRAM */
+       if (bo->tbo.base.import_attach) {
+               if (domain & AMDGPU_GEM_DOMAIN_GTT)
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch b/queue-5.19/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch

new file mode 100644 (file)

index 0000000..eadfcb2
--- /dev/null
+++ b/queue-5.19/drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch
@@ -0,0 +1,57 @@
+From 9850d6a91e51f3700fdfd4443f77b459b037e07d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 12:45:37 +0300
+Subject: drm/dp/mst: Read the extended DPCD capabilities during system resume
+
+From: Imre Deak <imre.deak@intel.com>
+
+[ Upstream commit 7a710a8bc909313951eb9252d8419924c771d7c2 ]
+
+The WD22TB4 Thunderbolt dock at least will revert its DP_MAX_LINK_RATE
+from HBR3 to HBR2 after system suspend/resume if the DP_DP13_DPCD_REV
+registers are not read subsequently also as required.
+
+Fix this by reading DP_DP13_DPCD_REV registers as well, matching what is
+done during connector detection. While at it also fix up the same call
+in drm_dp_mst_dump_topology().
+
+Cc: Lyude Paul <lyude@redhat.com>
+Closes: https://gitlab.freedesktop.org/drm/intel/-/issues/5292
+Signed-off-by: Imre Deak <imre.deak@intel.com>
+Reviewed-by: Jani Nikula <jani.nikula@intel.com>
+Cc: <stable@vger.kernel.org> # v5.14+
+Reviewed-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220614094537.885472-1-imre.deak@intel.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/display/drm_dp_mst_topology.c | 7 ++-----
+ 1 file changed, 2 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+index 67b3b9697da7..18f2b6075b78 100644
+--- a/drivers/gpu/drm/display/drm_dp_mst_topology.c
++++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c
+@@ -3860,9 +3860,7 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr,
+       if (!mgr->mst_primary)
+               goto out_fail;
+ 
+-      ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, mgr->dpcd,
+-                             DP_RECEIVER_CAP_SIZE);
+-      if (ret != DP_RECEIVER_CAP_SIZE) {
++      if (drm_dp_read_dpcd_caps(mgr->aux, mgr->dpcd) < 0) {
+               drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n");
+               goto out_fail;
+       }
+@@ -4911,8 +4909,7 @@ void drm_dp_mst_dump_topology(struct seq_file *m,
+               u8 buf[DP_PAYLOAD_TABLE_SIZE];
+               int ret;
+ 
+-              ret = drm_dp_dpcd_read(mgr->aux, DP_DPCD_REV, buf, DP_RECEIVER_CAP_SIZE);
+-              if (ret) {
++              if (drm_dp_read_dpcd_caps(mgr->aux, buf) < 0) {
+                       seq_printf(m, "dpcd read failed\n");
+                       goto out;
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-fb-helper-fix-out-of-bounds-access.patch-14074 b/queue-5.19/drm-fb-helper-fix-out-of-bounds-access.patch-14074

new file mode 100644 (file)

index 0000000..ba9fb32
--- /dev/null
+++ b/queue-5.19/drm-fb-helper-fix-out-of-bounds-access.patch-14074
@@ -0,0 +1,100 @@
+From 4ba6933086fff9c00b0faa445d6f4e7d4e5ff751 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 21 Jun 2022 12:46:17 +0200
+Subject: drm/fb-helper: Fix out-of-bounds access
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+[ Upstream commit ae25885bdf59fde40726863c57fd20e4a0642183 ]
+
+Clip memory range to screen-buffer size to avoid out-of-bounds access
+in fbdev deferred I/O's damage handling.
+
+Fbdev's deferred I/O can only track pages. From the range of pages, the
+damage handler computes the clipping rectangle for the display update.
+If the fbdev screen buffer ends near the beginning of a page, that page
+could contain more scanlines. The damage handler would then track these
+non-existing scanlines as dirty and provoke an out-of-bounds access
+during the screen update. Hence, clip the maximum memory range to the
+size of the screen buffer.
+
+While at it, rename the variables min/max to min_off/max_off in
+drm_fb_helper_deferred_io(). This avoids confusion with the macros of
+the same name.
+
+Reported-by: Nuno Gonçalves <nunojpg@gmail.com>
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Reviewed-by: Javier Martinez Canillas <javierm@redhat.com>
+Tested-by: Nuno Gonçalves <nunojpg@gmail.com>
+Fixes: 67b723f5b742 ("drm/fb-helper: Calculate damaged area in separate helper")
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Javier Martinez Canillas <javierm@redhat.com>
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Cc: Maxime Ripard <mripard@kernel.org>
+Cc: <stable@vger.kernel.org> # v5.18+
+Link: https://patchwork.freedesktop.org/patch/msgid/20220621104617.8817-1-tzimmermann@suse.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/drm_fb_helper.c | 27 +++++++++++++++++++--------
+ 1 file changed, 19 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
+index 5ad2b6a2778c..1705e8d345ab 100644
+--- a/drivers/gpu/drm/drm_fb_helper.c
++++ b/drivers/gpu/drm/drm_fb_helper.c
+@@ -680,7 +680,11 @@ static void drm_fb_helper_damage(struct fb_info *info, u32 x, u32 y,
+       schedule_work(&helper->damage_work);
+ }
+ 
+-/* Convert memory region into area of scanlines and pixels per scanline */
++/*
++ * Convert memory region into area of scanlines and pixels per
++ * scanline. The parameters off and len must not reach beyond
++ * the end of the framebuffer.
++ */
+ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off, size_t len,
+                                              struct drm_rect *clip)
+ {
+@@ -715,22 +719,29 @@ static void drm_fb_helper_memory_range_to_clip(struct fb_info *info, off_t off,
+  */
+ void drm_fb_helper_deferred_io(struct fb_info *info, struct list_head *pagereflist)
+ {
+-      unsigned long start, end, min, max;
++      unsigned long start, end, min_off, max_off;
+       struct fb_deferred_io_pageref *pageref;
+       struct drm_rect damage_area;
+ 
+-      min = ULONG_MAX;
+-      max = 0;
++      min_off = ULONG_MAX;
++      max_off = 0;
+       list_for_each_entry(pageref, pagereflist, list) {
+               start = pageref->offset;
+               end = start + PAGE_SIZE;
+-              min = min(min, start);
+-              max = max(max, end);
++              min_off = min(min_off, start);
++              max_off = max(max_off, end);
+       }
+-      if (min >= max)
++      if (min_off >= max_off)
+               return;
+ 
+-      drm_fb_helper_memory_range_to_clip(info, min, max - min, &damage_area);
++      /*
++       * As we can only track pages, we might reach beyond the end
++       * of the screen and account for non-existing scanlines. Hence,
++       * keep the covered memory area within the screen buffer.
++       */
++      max_off = min(max_off, info->screen_size);
++
++      drm_fb_helper_memory_range_to_clip(info, min_off, max_off - min_off, &damage_area);
+       drm_fb_helper_damage(info, damage_area.x1, damage_area.y1,
+                            drm_rect_width(&damage_area),
+                            drm_rect_height(&damage_area));
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-hyperv-drm-include-framebuffer-and-edid-headers.patch-15144 b/queue-5.19/drm-hyperv-drm-include-framebuffer-and-edid-headers.patch-15144

new file mode 100644 (file)

index 0000000..dee42c4
--- /dev/null
+++ b/queue-5.19/drm-hyperv-drm-include-framebuffer-and-edid-headers.patch-15144
@@ -0,0 +1,67 @@
+From c788aef49db3a12b08d26fbbf7fddb8cf92a71ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 22 Jun 2022 10:34:13 +0200
+Subject: drm/hyperv-drm: Include framebuffer and EDID headers
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Thomas Zimmermann <tzimmermann@suse.de>
+
+[ Upstream commit 009a3a52791f31c57d755a73f6bc66fbdd8bd76c ]
+
+Fix a number of compile errors by including the correct header
+files. Examples are shown below.
+
+  ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c: In function 'hyperv_blit_to_vram_rect':
+  ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:25:48: error: invalid use of undefined type 'struct drm_framebuffer'
+   25 |         struct hyperv_drm_device *hv = to_hv(fb->dev);
+      |                                                ^~
+
+  ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c: In function 'hyperv_connector_get_modes':
+  ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:59:17: error: implicit declaration of function 'drm_add_modes_noedid' [-Werror=implicit-function-declaration]
+   59 |         count = drm_add_modes_noedid(connector,
+      |                 ^~~~~~~~~~~~~~~~~~~~
+
+  ../drivers/gpu/drm/hyperv/hyperv_drm_modeset.c:62:9: error: implicit declaration of function 'drm_set_preferred_mode'; did you mean 'drm_mm_reserve_node'? [-Werror=implicit-function-declaration]
+   62 |         drm_set_preferred_mode(connector, hv->preferred_width,
+      |         ^~~~~~~~~~~~~~~~~~~~~~
+
+Signed-off-by: Thomas Zimmermann <tzimmermann@suse.de>
+Fixes: 76c56a5affeb ("drm/hyperv: Add DRM driver for hyperv synthetic video device")
+Fixes: 720cf96d8fec ("drm: Drop drm_framebuffer.h from drm_crtc.h")
+Fixes: 255490f9150d ("drm: Drop drm_edid.h from drm_crtc.h")
+Cc: Deepak Rawat <drawat.floss@gmail.com>
+Cc: Thomas Zimmermann <tzimmermann@suse.de>
+Cc: Maarten Lankhorst <maarten.lankhorst@linux.intel.com>
+Cc: Maxime Ripard <mripard@kernel.org>
+Cc: linux-hyperv@vger.kernel.org
+Cc: dri-devel@lists.freedesktop.org
+Cc: <stable@vger.kernel.org> # v5.14+
+Acked-by: Maxime Ripard <maxime@cerno.tech>
+Reviewed-by: Ville Syrjälä <ville.syrjala@linux.intel.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220622083413.12573-1-tzimmermann@suse.de
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/hyperv/hyperv_drm_modeset.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
+index 27f4fcb058f9..b8e64dd8d3a6 100644
+--- a/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
++++ b/drivers/gpu/drm/hyperv/hyperv_drm_modeset.c
+@@ -7,9 +7,11 @@
+ 
+ #include <drm/drm_damage_helper.h>
+ #include <drm/drm_drv.h>
++#include <drm/drm_edid.h>
+ #include <drm/drm_fb_helper.h>
+ #include <drm/drm_format_helper.h>
+ #include <drm/drm_fourcc.h>
++#include <drm/drm_framebuffer.h>
+ #include <drm/drm_gem_atomic_helper.h>
+ #include <drm/drm_gem_framebuffer_helper.h>
+ #include <drm/drm_gem_shmem_helper.h>
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-ingenic-use-the-highest-possible-dma-burst-size.patch-22931 b/queue-5.19/drm-ingenic-use-the-highest-possible-dma-burst-size.patch-22931

new file mode 100644 (file)

index 0000000..a19c2a0
--- /dev/null
+++ b/queue-5.19/drm-ingenic-use-the-highest-possible-dma-burst-size.patch-22931
@@ -0,0 +1,110 @@
+From ca2b56495ef3523ca282c39439e94ca901512595 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 3 Jul 2022 00:07:27 +0100
+Subject: drm/ingenic: Use the highest possible DMA burst size
+
+From: Paul Cercueil <paul@crapouillou.net>
+
+[ Upstream commit f0dce5c4fdaf9e98dd2755ffb1363822854b6287 ]
+
+Until now, when running at the maximum resolution of 1280x720 at 32bpp
+on the JZ4770 SoC the output was garbled, the X/Y position of the
+top-left corner of the framebuffer warping to a random position with
+the whole image being offset accordingly, every time a new frame was
+being submitted.
+
+This problem can be eliminated by using a bigger burst size for the DMA.
+
+Set in each soc_info structure the maximum burst size supported by the
+corresponding SoC, and use it in the driver.
+
+Set the new value using regmap_update_bits() instead of
+regmap_set_bits(), since we do want to override the old value of the
+burst size. (Note that regmap_set_bits() wasn't really valid before for
+the same reason, but it never seemed to be a problem).
+
+Cc: <stable@vger.kernel.org>
+Fixes: 90b86fcc47b4 ("DRM: Add KMS driver for the Ingenic JZ47xx SoCs")
+Signed-off-by: Paul Cercueil <paul@crapouillou.net>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220702230727.66704-1-paul@crapouillou.net
+Acked-by: Sam Ravnborg <sam@ravnborg.org>
+Tested-by: Christophe Branchereau <cbranchereau@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/ingenic/ingenic-drm-drv.c | 10 ++++++++--
+ drivers/gpu/drm/ingenic/ingenic-drm.h     |  3 +++
+ 2 files changed, 11 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+index 8eb0ad501a7b..150a973c6001 100644
+--- a/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
++++ b/drivers/gpu/drm/ingenic/ingenic-drm-drv.c
+@@ -69,6 +69,7 @@ struct jz_soc_info {
+       bool map_noncoherent;
+       bool use_extended_hwdesc;
+       bool plane_f0_not_working;
++      u32 max_burst;
+       unsigned int max_width, max_height;
+       const u32 *formats_f0, *formats_f1;
+       unsigned int num_formats_f0, num_formats_f1;
+@@ -318,8 +319,9 @@ static void ingenic_drm_crtc_update_timings(struct ingenic_drm *priv,
+               regmap_write(priv->map, JZ_REG_LCD_REV, mode->htotal << 16);
+       }
+ 
+-      regmap_set_bits(priv->map, JZ_REG_LCD_CTRL,
+-                      JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_16);
++      regmap_update_bits(priv->map, JZ_REG_LCD_CTRL,
++                         JZ_LCD_CTRL_OFUP | JZ_LCD_CTRL_BURST_MASK,
++                         JZ_LCD_CTRL_OFUP | priv->soc_info->max_burst);
+ 
+       /*
+        * IPU restart - specify how much time the LCDC will wait before
+@@ -1518,6 +1520,7 @@ static const struct jz_soc_info jz4740_soc_info = {
+       .map_noncoherent = false,
+       .max_width = 800,
+       .max_height = 600,
++      .max_burst = JZ_LCD_CTRL_BURST_16,
+       .formats_f1 = jz4740_formats,
+       .num_formats_f1 = ARRAY_SIZE(jz4740_formats),
+       /* JZ4740 has only one plane */
+@@ -1529,6 +1532,7 @@ static const struct jz_soc_info jz4725b_soc_info = {
+       .map_noncoherent = false,
+       .max_width = 800,
+       .max_height = 600,
++      .max_burst = JZ_LCD_CTRL_BURST_16,
+       .formats_f1 = jz4725b_formats_f1,
+       .num_formats_f1 = ARRAY_SIZE(jz4725b_formats_f1),
+       .formats_f0 = jz4725b_formats_f0,
+@@ -1541,6 +1545,7 @@ static const struct jz_soc_info jz4770_soc_info = {
+       .map_noncoherent = true,
+       .max_width = 1280,
+       .max_height = 720,
++      .max_burst = JZ_LCD_CTRL_BURST_64,
+       .formats_f1 = jz4770_formats_f1,
+       .num_formats_f1 = ARRAY_SIZE(jz4770_formats_f1),
+       .formats_f0 = jz4770_formats_f0,
+@@ -1555,6 +1560,7 @@ static const struct jz_soc_info jz4780_soc_info = {
+       .plane_f0_not_working = true,   /* REVISIT */
+       .max_width = 4096,
+       .max_height = 2048,
++      .max_burst = JZ_LCD_CTRL_BURST_64,
+       .formats_f1 = jz4770_formats_f1,
+       .num_formats_f1 = ARRAY_SIZE(jz4770_formats_f1),
+       .formats_f0 = jz4770_formats_f0,
+diff --git a/drivers/gpu/drm/ingenic/ingenic-drm.h b/drivers/gpu/drm/ingenic/ingenic-drm.h
+index cb1d09b62588..e5bd007ea93d 100644
+--- a/drivers/gpu/drm/ingenic/ingenic-drm.h
++++ b/drivers/gpu/drm/ingenic/ingenic-drm.h
+@@ -106,6 +106,9 @@
+ #define JZ_LCD_CTRL_BURST_4                   (0x0 << 28)
+ #define JZ_LCD_CTRL_BURST_8                   (0x1 << 28)
+ #define JZ_LCD_CTRL_BURST_16                  (0x2 << 28)
++#define JZ_LCD_CTRL_BURST_32                  (0x3 << 28)
++#define JZ_LCD_CTRL_BURST_64                  (0x4 << 28)
++#define JZ_LCD_CTRL_BURST_MASK                        (0x7 << 28)
+ #define JZ_LCD_CTRL_RGB555                    BIT(27)
+ #define JZ_LCD_CTRL_OFUP                      BIT(26)
+ #define JZ_LCD_CTRL_FRC_GRAYSCALE_16          (0x0 << 24)
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch b/queue-5.19/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch

new file mode 100644 (file)

index 0000000..7ba59a7
--- /dev/null
+++ b/queue-5.19/drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch
@@ -0,0 +1,116 @@
+From e5abee11d10c7bd8122edefca8fe402bbae93fa0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 10:00:06 +0800
+Subject: drm/mediatek: Keep dsi as LP00 before dcs cmds transfer
+
+From: Jitao Shi <jitao.shi@mediatek.com>
+
+[ Upstream commit 39e8d062b03c3dc257d880d82bd55cdd9e185a3b ]
+
+To comply with the panel sequence, hold the mipi signal to LP00 before
+the dcs cmds transmission, and pull the mipi signal high from LP00 to
+LP11 until the start of the dcs cmds transmission.
+
+The normal panel timing is :
+(1) pp1800 DC pull up
+(2) avdd & avee AC pull high
+(3) lcm_reset pull high -> pull low -> pull high
+(4) Pull MIPI signal high (LP11) -> initial code -> send video data
+    (HS mode)
+
+The power-off sequence is reversed.
+If dsi is not in cmd mode, then dsi will pull the mipi signal high in
+the mtk_output_dsi_enable function. The delay in lane_ready func is
+the reaction time of dsi_rx after pulling up the mipi signal.
+
+Fixes: 2dd8075d2185 ("drm/mediatek: mtk_dsi: Use the drm_panel_bridge API")
+
+Link: https://patchwork.kernel.org/project/linux-mediatek/patch/1653012007-11854-4-git-send-email-xinlei.lee@mediatek.com/
+Cc: <stable@vger.kernel.org> # 5.10.x: 7f6335c6a258: drm/mediatek: Modify dsi funcs to atomic operations
+Cc: <stable@vger.kernel.org> # 5.10.x: cde7e2e35c28: drm/mediatek: Separate poweron/poweroff from enable/disable and define new funcs
+Cc: <stable@vger.kernel.org> # 5.10.x
+Signed-off-by: Jitao Shi <jitao.shi@mediatek.com>
+Signed-off-by: Xinlei Lee <xinlei.lee@mediatek.com>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Reviewed-by: Rex-BC Chen <rex-bc.chen@mediatek.com>
+Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/mediatek/mtk_dsi.c | 28 +++++++++++++++++++++-------
+ 1 file changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index 966a4729bb41..907d07eda000 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -203,6 +203,7 @@ struct mtk_dsi {
+       struct mtk_phy_timing phy_timing;
+       int refcount;
+       bool enabled;
++      bool lanes_ready;
+       u32 irq_data;
+       wait_queue_head_t irq_wait_queue;
+       const struct mtk_dsi_driver_data *driver_data;
+@@ -661,18 +662,11 @@ static int mtk_dsi_poweron(struct mtk_dsi *dsi)
+       mtk_dsi_reset_engine(dsi);
+       mtk_dsi_phy_timconfig(dsi);
+ 
+-      mtk_dsi_rxtx_control(dsi);
+-      usleep_range(30, 100);
+-      mtk_dsi_reset_dphy(dsi);
+       mtk_dsi_ps_control_vact(dsi);
+       mtk_dsi_set_vm_cmd(dsi);
+       mtk_dsi_config_vdo_timing(dsi);
+       mtk_dsi_set_interrupt_enable(dsi);
+ 
+-      mtk_dsi_clk_ulp_mode_leave(dsi);
+-      mtk_dsi_lane0_ulp_mode_leave(dsi);
+-      mtk_dsi_clk_hs_mode(dsi, 0);
+-
+       return 0;
+ err_disable_engine_clk:
+       clk_disable_unprepare(dsi->engine_clk);
+@@ -701,6 +695,23 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+       clk_disable_unprepare(dsi->digital_clk);
+ 
+       phy_power_off(dsi->phy);
++
++      dsi->lanes_ready = false;
++}
++
++static void mtk_dsi_lane_ready(struct mtk_dsi *dsi)
++{
++      if (!dsi->lanes_ready) {
++              dsi->lanes_ready = true;
++              mtk_dsi_rxtx_control(dsi);
++              usleep_range(30, 100);
++              mtk_dsi_reset_dphy(dsi);
++              mtk_dsi_clk_ulp_mode_leave(dsi);
++              mtk_dsi_lane0_ulp_mode_leave(dsi);
++              mtk_dsi_clk_hs_mode(dsi, 0);
++              msleep(20);
++              /* The reaction time after pulling up the mipi signal for dsi_rx */
++      }
+ }
+ 
+ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
+@@ -708,6 +719,7 @@ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
+       if (dsi->enabled)
+               return;
+ 
++      mtk_dsi_lane_ready(dsi);
+       mtk_dsi_set_mode(dsi);
+       mtk_dsi_clk_hs_mode(dsi, 1);
+ 
+@@ -1017,6 +1029,8 @@ static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
+       if (MTK_DSI_HOST_IS_READ(msg->type))
+               irq_flag |= LPRX_RD_RDY_INT_FLAG;
+ 
++      mtk_dsi_lane_ready(dsi);
++
+       ret = mtk_dsi_host_send_cmd(dsi, msg, irq_flag);
+       if (ret)
+               goto restore_dsi_mode;
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-mediatek-modify-dsi-funcs-to-atomic-operations.patch-7159 b/queue-5.19/drm-mediatek-modify-dsi-funcs-to-atomic-operations.patch-7159

new file mode 100644 (file)

index 0000000..d37918d
--- /dev/null
+++ b/queue-5.19/drm-mediatek-modify-dsi-funcs-to-atomic-operations.patch-7159
@@ -0,0 +1,59 @@
+From eee633764249f17bb03e27d0f62b3d3f56f7bf55 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 10:00:04 +0800
+Subject: drm/mediatek: Modify dsi funcs to atomic operations
+
+From: Xinlei Lee <xinlei.lee@mediatek.com>
+
+[ Upstream commit 7f6335c6a258edf4d5ff1b904bc033188dc7b48b ]
+
+Because .enable & .disable are deprecated.
+Use .atomic_enable & .atomic_disable instead.
+
+Link: https://patchwork.kernel.org/project/linux-mediatek/patch/1653012007-11854-2-git-send-email-xinlei.lee@mediatek.com/
+Signed-off-by: Jitao Shi <jitao.shi@mediatek.com>
+Signed-off-by: Xinlei Lee <xinlei.lee@mediatek.com>
+Reviewed-by: Rex-BC Chen <rex-bc.chen@mediatek.com>
+Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/mediatek/mtk_dsi.c | 10 ++++++----
+ 1 file changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index d9f10a33e6fa..6e7793f935da 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -763,14 +763,16 @@ static void mtk_dsi_bridge_mode_set(struct drm_bridge *bridge,
+       drm_display_mode_to_videomode(adjusted, &dsi->vm);
+ }
+ 
+-static void mtk_dsi_bridge_disable(struct drm_bridge *bridge)
++static void mtk_dsi_bridge_atomic_disable(struct drm_bridge *bridge,
++                                        struct drm_bridge_state *old_bridge_state)
+ {
+       struct mtk_dsi *dsi = bridge_to_dsi(bridge);
+ 
+       mtk_output_dsi_disable(dsi);
+ }
+ 
+-static void mtk_dsi_bridge_enable(struct drm_bridge *bridge)
++static void mtk_dsi_bridge_atomic_enable(struct drm_bridge *bridge,
++                                       struct drm_bridge_state *old_bridge_state)
+ {
+       struct mtk_dsi *dsi = bridge_to_dsi(bridge);
+ 
+@@ -779,8 +781,8 @@ static void mtk_dsi_bridge_enable(struct drm_bridge *bridge)
+ 
+ static const struct drm_bridge_funcs mtk_dsi_bridge_funcs = {
+       .attach = mtk_dsi_bridge_attach,
+-      .disable = mtk_dsi_bridge_disable,
+-      .enable = mtk_dsi_bridge_enable,
++      .atomic_disable = mtk_dsi_bridge_atomic_disable,
++      .atomic_enable = mtk_dsi_bridge_atomic_enable,
+       .mode_set = mtk_dsi_bridge_mode_set,
+ };
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-mediatek-separate-poweron-poweroff-from-enable-d.patch-3169 b/queue-5.19/drm-mediatek-separate-poweron-poweroff-from-enable-d.patch-3169

new file mode 100644 (file)

index 0000000..908a913
--- /dev/null
+++ b/queue-5.19/drm-mediatek-separate-poweron-poweroff-from-enable-d.patch-3169
@@ -0,0 +1,130 @@
+From fdbabb61cb02a8883acbb52d303ced70bd0ec21e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 20 May 2022 10:00:05 +0800
+Subject: drm/mediatek: Separate poweron/poweroff from enable/disable and
+ define new funcs
+
+From: Jitao Shi <jitao.shi@mediatek.com>
+
+[ Upstream commit cde7e2e35c2866d22a3a012e72a41052dfcc255d ]
+
+In order to match the changes of "Use the drm_panel_bridge API",
+the poweron/poweroff of dsi is extracted from enable/disable and
+defined as new funcs (atomic_pre_enable/atomic_post_disable).
+
+Since dsi_poweron is moved from dsi_enable to pre_enable function, in
+order to avoid poweron failure, the operation of dsi register fails to
+cause bus hang. Therefore, the protection mechanism is added to the
+dsi_enable function.
+
+Fixes: 2dd8075d2185 ("drm/mediatek: mtk_dsi: Use the drm_panel_bridge API")
+
+Link: https://patchwork.kernel.org/project/linux-mediatek/patch/1653012007-11854-3-git-send-email-xinlei.lee@mediatek.com/
+Signed-off-by: Jitao Shi <jitao.shi@mediatek.com>
+Signed-off-by: Xinlei Lee <xinlei.lee@mediatek.com>
+Reviewed-by: AngeloGioacchino Del Regno <angelogioacchino.delregno@collabora.com>
+Reviewed-by: Rex-BC Chen <rex-bc.chen@mediatek.com>
+Signed-off-by: Chun-Kuang Hu <chunkuang.hu@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/mediatek/mtk_dsi.c | 53 +++++++++++++++++++-----------
+ 1 file changed, 34 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
+index 6e7793f935da..966a4729bb41 100644
+--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
++++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
+@@ -691,16 +691,6 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+       if (--dsi->refcount != 0)
+               return;
+ 
+-      /*
+-       * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since
+-       * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(),
+-       * which needs irq for vblank, and mtk_dsi_stop() will disable irq.
+-       * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(),
+-       * after dsi is fully set.
+-       */
+-      mtk_dsi_stop(dsi);
+-
+-      mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500);
+       mtk_dsi_reset_engine(dsi);
+       mtk_dsi_lane0_ulp_mode_enter(dsi);
+       mtk_dsi_clk_ulp_mode_enter(dsi);
+@@ -715,17 +705,9 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
+ 
+ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
+ {
+-      int ret;
+-
+       if (dsi->enabled)
+               return;
+ 
+-      ret = mtk_dsi_poweron(dsi);
+-      if (ret < 0) {
+-              DRM_ERROR("failed to power on dsi\n");
+-              return;
+-      }
+-
+       mtk_dsi_set_mode(dsi);
+       mtk_dsi_clk_hs_mode(dsi, 1);
+ 
+@@ -739,7 +721,16 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
+       if (!dsi->enabled)
+               return;
+ 
+-      mtk_dsi_poweroff(dsi);
++      /*
++       * mtk_dsi_stop() and mtk_dsi_start() is asymmetric, since
++       * mtk_dsi_stop() should be called after mtk_drm_crtc_atomic_disable(),
++       * which needs irq for vblank, and mtk_dsi_stop() will disable irq.
++       * mtk_dsi_start() needs to be called in mtk_output_dsi_enable(),
++       * after dsi is fully set.
++       */
++      mtk_dsi_stop(dsi);
++
++      mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500);
+ 
+       dsi->enabled = false;
+ }
+@@ -776,13 +767,37 @@ static void mtk_dsi_bridge_atomic_enable(struct drm_bridge *bridge,
+ {
+       struct mtk_dsi *dsi = bridge_to_dsi(bridge);
+ 
++      if (dsi->refcount == 0)
++              return;
++
+       mtk_output_dsi_enable(dsi);
+ }
+ 
++static void mtk_dsi_bridge_atomic_pre_enable(struct drm_bridge *bridge,
++                                           struct drm_bridge_state *old_bridge_state)
++{
++      struct mtk_dsi *dsi = bridge_to_dsi(bridge);
++      int ret;
++
++      ret = mtk_dsi_poweron(dsi);
++      if (ret < 0)
++              DRM_ERROR("failed to power on dsi\n");
++}
++
++static void mtk_dsi_bridge_atomic_post_disable(struct drm_bridge *bridge,
++                                             struct drm_bridge_state *old_bridge_state)
++{
++      struct mtk_dsi *dsi = bridge_to_dsi(bridge);
++
++      mtk_dsi_poweroff(dsi);
++}
++
+ static const struct drm_bridge_funcs mtk_dsi_bridge_funcs = {
+       .attach = mtk_dsi_bridge_attach,
+       .atomic_disable = mtk_dsi_bridge_atomic_disable,
+       .atomic_enable = mtk_dsi_bridge_atomic_enable,
++      .atomic_pre_enable = mtk_dsi_bridge_atomic_pre_enable,
++      .atomic_post_disable = mtk_dsi_bridge_atomic_post_disable,
+       .mode_set = mtk_dsi_bridge_mode_set,
+ };
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-nouveau-acpi-don-t-print-error-when-we-get-einpr.patch b/queue-5.19/drm-nouveau-acpi-don-t-print-error-when-we-get-einpr.patch

new file mode 100644 (file)

index 0000000..50348b5
--- /dev/null
+++ b/queue-5.19/drm-nouveau-acpi-don-t-print-error-when-we-get-einpr.patch
@@ -0,0 +1,38 @@
+From a3d015c94a8063fa5733033f7e3cb13d9164e73d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 13:42:33 -0400
+Subject: drm/nouveau/acpi: Don't print error when we get -EINPROGRESS from
+ pm_runtime
+
+From: Lyude Paul <lyude@redhat.com>
+
+[ Upstream commit 53c26181950ddc3c8ace3c0939c89e9c4d8deeb9 ]
+
+Since this isn't actually a failure.
+
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Reviewed-by: David Airlie <airlied@linux.ie>
+Fixes: 79e765ad665d ("drm/nouveau/drm/nouveau: Prevent handling ACPI HPD events too early")
+Cc: <stable@vger.kernel.org> # v4.19+
+Link: https://patchwork.freedesktop.org/patch/msgid/20220714174234.949259-2-lyude@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_display.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
+index 2cd0932b3d68..9f5a45f24e5b 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_display.c
++++ b/drivers/gpu/drm/nouveau/nouveau_display.c
+@@ -537,7 +537,7 @@ nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
+                                * it's own hotplug events.
+                                */
+                               pm_runtime_put_autosuspend(drm->dev->dev);
+-                      } else if (ret == 0) {
++                      } else if (ret == 0 || ret == -EINPROGRESS) {
+                               /* We've started resuming the GPU already, so
+                                * it will handle scheduling a full reprobe
+                                * itself
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-nouveau-don-t-pm_runtime_put_sync-only-pm_runtim.patch b/queue-5.19/drm-nouveau-don-t-pm_runtime_put_sync-only-pm_runtim.patch

new file mode 100644 (file)

index 0000000..5a3dfea
--- /dev/null
+++ b/queue-5.19/drm-nouveau-don-t-pm_runtime_put_sync-only-pm_runtim.patch
@@ -0,0 +1,62 @@
+From d4a4ca1be1af412505da2fd5b60858077e313854 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 13:42:34 -0400
+Subject: drm/nouveau: Don't pm_runtime_put_sync(), only
+ pm_runtime_put_autosuspend()
+
+From: Lyude Paul <lyude@redhat.com>
+
+[ Upstream commit c96cfaf8fc02d4bb70727dfa7ce7841a3cff9be2 ]
+
+While trying to fix another issue, it occurred to me that I don't actually
+think there is any situation where we want pm_runtime_put() in nouveau to
+be synchronous. In fact, this kind of just seems like it would cause
+issues where we may unexpectedly block a thread we don't expect to be
+blocked.
+
+So, let's only use pm_runtime_put_autosuspend().
+
+Changes since v1:
+* Use pm_runtime_put_autosuspend(), not pm_runtime_put()
+
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Reviewed-by: David Airlie <airlied@linux.ie>
+Fixes: 3a6536c51d5d ("drm/nouveau: Intercept ACPI_VIDEO_NOTIFY_PROBE")
+Cc: Hans de Goede <hdegoede@redhat.com>
+Cc: <stable@vger.kernel.org> # v4.10+
+Link: https://patchwork.freedesktop.org/patch/msgid/20220714174234.949259-3-lyude@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_display.c | 2 +-
+ drivers/gpu/drm/nouveau/nouveau_fbcon.c   | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
+index 9f5a45f24e5b..a2f5df568ca5 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_display.c
++++ b/drivers/gpu/drm/nouveau/nouveau_display.c
+@@ -515,7 +515,7 @@ nouveau_display_hpd_work(struct work_struct *work)
+ 
+       pm_runtime_mark_last_busy(drm->dev->dev);
+ noop:
+-      pm_runtime_put_sync(drm->dev->dev);
++      pm_runtime_put_autosuspend(dev->dev);
+ }
+ 
+ #ifdef CONFIG_ACPI
+diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+index 4f9b3aa5deda..20ac1ce2c0f1 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
++++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+@@ -466,7 +466,7 @@ nouveau_fbcon_set_suspend_work(struct work_struct *work)
+       if (state == FBINFO_STATE_RUNNING) {
+               nouveau_fbcon_hotplug_resume(drm->fbcon);
+               pm_runtime_mark_last_busy(drm->dev->dev);
+-              pm_runtime_put_sync(drm->dev->dev);
++              pm_runtime_put_autosuspend(drm->dev->dev);
+       }
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-nouveau-fix-another-off-by-one-in-nvbios_addr.patch-28623 b/queue-5.19/drm-nouveau-fix-another-off-by-one-in-nvbios_addr.patch-28623

new file mode 100644 (file)

index 0000000..a4a5c25
--- /dev/null
+++ b/queue-5.19/drm-nouveau-fix-another-off-by-one-in-nvbios_addr.patch-28623
@@ -0,0 +1,40 @@
+From bc0f39623d2a1b3b85f1c5b11f566aaa119ccbb3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 11 May 2022 11:37:16 -0500
+Subject: drm/nouveau: fix another off-by-one in nvbios_addr
+
+From: Timur Tabi <ttabi@nvidia.com>
+
+[ Upstream commit c441d28945fb113220d48d6c86ebc0b090a2b677 ]
+
+This check determines whether a given address is part of
+image 0 or image 1.  Image 1 starts at offset image0_size,
+so that address should be included.
+
+Fixes: 4d4e9907ff572 ("drm/nouveau/bios: guard against out-of-bounds accesses to image")
+Cc: <stable@vger.kernel.org> # v4.8+
+Signed-off-by: Timur Tabi <ttabi@nvidia.com>
+Reviewed-by: Karol Herbst <kherbst@redhat.com>
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Link: https://patchwork.freedesktop.org/patch/msgid/20220511163716.3520591-1-ttabi@nvidia.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+index 64e423dddd9e..6c318e41bde0 100644
+--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
++++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/base.c
+@@ -33,7 +33,7 @@ nvbios_addr(struct nvkm_bios *bios, u32 *addr, u8 size)
+ {
+       u32 p = *addr;
+ 
+-      if (*addr > bios->image0_size && bios->imaged_addr) {
++      if (*addr >= bios->image0_size && bios->imaged_addr) {
+               *addr -= bios->image0_size;
+               *addr += bios->imaged_addr;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-nouveau-kms-fix-failure-path-for-creating-dp-con.patch b/queue-5.19/drm-nouveau-kms-fix-failure-path-for-creating-dp-con.patch

new file mode 100644 (file)

index 0000000..dfb0283
--- /dev/null
+++ b/queue-5.19/drm-nouveau-kms-fix-failure-path-for-creating-dp-con.patch
@@ -0,0 +1,51 @@
+From 06dd7fcf1e1e47ca3ed1e14497e5246821c9ef4c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 May 2022 16:43:13 -0400
+Subject: drm/nouveau/kms: Fix failure path for creating DP connectors
+
+From: Lyude Paul <lyude@redhat.com>
+
+[ Upstream commit ca0367ca5d9216644b41f86348d6661f8d9e32d8 ]
+
+It looks like that when we moved nouveau over to using drm_dp_aux_init()
+and registering it's aux bus during late connector registration, we totally
+forgot to fix the failure codepath in nouveau_connector_create() - as it
+still seems to assume that drm_dp_aux_init() can fail (it can't).
+
+So, let's fix that and also add a missing check to ensure that we've
+properly allocated nv_connector->aux.name while we're at it.
+
+Signed-off-by: Lyude Paul <lyude@redhat.com>
+Reviewed-by: David Airlie <airlied@linux.ie>
+Fixes: fd43ad9d47e7 ("drm/nouveau/kms/nv50-: Move AUX adapter reg to connector late register/early unregister")
+Cc: <stable@vger.kernel.org> # v5.14+
+Link: https://patchwork.freedesktop.org/patch/msgid/20220526204313.656473-1-lyude@redhat.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/nouveau/nouveau_connector.c | 8 +++-----
+ 1 file changed, 3 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
+index 22b83a6577eb..df83c4654e26 100644
+--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
++++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
+@@ -1361,13 +1361,11 @@ nouveau_connector_create(struct drm_device *dev,
+               snprintf(aux_name, sizeof(aux_name), "sor-%04x-%04x",
+                        dcbe->hasht, dcbe->hashm);
+               nv_connector->aux.name = kstrdup(aux_name, GFP_KERNEL);
+-              drm_dp_aux_init(&nv_connector->aux);
+-              if (ret) {
+-                      NV_ERROR(drm, "Failed to init AUX adapter for sor-%04x-%04x: %d\n",
+-                               dcbe->hasht, dcbe->hashm, ret);
++              if (!nv_connector->aux.name) {
+                       kfree(nv_connector);
+-                      return ERR_PTR(ret);
++                      return ERR_PTR(-ENOMEM);
+               }
++              drm_dp_aux_init(&nv_connector->aux);
+               fallthrough;
+       default:
+               funcs = &nouveau_connector_funcs;
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-tegra-fix-vmapping-of-prime-buffers.patch-28390 b/queue-5.19/drm-tegra-fix-vmapping-of-prime-buffers.patch-28390

new file mode 100644 (file)

index 0000000..a033a38
--- /dev/null
+++ b/queue-5.19/drm-tegra-fix-vmapping-of-prime-buffers.patch-28390
@@ -0,0 +1,56 @@
+From ac0c918731ae633f821d9f30277fb30ec0a05cbd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 01:42:39 +0300
+Subject: drm/tegra: Fix vmapping of prime buffers
+
+From: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+
+[ Upstream commit c7860cbee9989882d2908682526a5ef617523cfe ]
+
+The code assumes that Tegra GEM is permanently vmapped, which is not
+true for the scattered buffers. After converting Tegra video decoder
+driver to V4L API, we're now getting a BUG_ON from dma-buf core on playing
+video using libvdpau-tegra on T30+ because tegra_gem_prime_vmap() sets
+vaddr to NULL. Older pre-V4L video decoder driver wasn't vmapping dma-bufs.
+Fix it by actually vmapping the exported GEMs.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Dmitry Osipenko <dmitry.osipenko@collabora.com>
+Signed-off-by: Thierry Reding <treding@nvidia.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/tegra/gem.c | 11 ++++++++++-
+ 1 file changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
+index 7c7dd84e6db8..81991090adcc 100644
+--- a/drivers/gpu/drm/tegra/gem.c
++++ b/drivers/gpu/drm/tegra/gem.c
+@@ -704,14 +704,23 @@ static int tegra_gem_prime_vmap(struct dma_buf *buf, struct iosys_map *map)
+ {
+       struct drm_gem_object *gem = buf->priv;
+       struct tegra_bo *bo = to_tegra_bo(gem);
++      void *vaddr;
+ 
+-      iosys_map_set_vaddr(map, bo->vaddr);
++      vaddr = tegra_bo_mmap(&bo->base);
++      if (IS_ERR(vaddr))
++              return PTR_ERR(vaddr);
++
++      iosys_map_set_vaddr(map, vaddr);
+ 
+       return 0;
+ }
+ 
+ static void tegra_gem_prime_vunmap(struct dma_buf *buf, struct iosys_map *map)
+ {
++      struct drm_gem_object *gem = buf->priv;
++      struct tegra_bo *bo = to_tegra_bo(gem);
++
++      tegra_bo_munmap(&bo->base, map->vaddr);
+ }
+ 
+ static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = {
+-- 
+2.35.1
+
diff --git a/queue-5.19/drm-vc4-hdmi-disable-audio-if-dmas-property-is-prese.patch b/queue-5.19/drm-vc4-hdmi-disable-audio-if-dmas-property-is-prese.patch

new file mode 100644 (file)

index 0000000..8bbe566
--- /dev/null
+++ b/queue-5.19/drm-vc4-hdmi-disable-audio-if-dmas-property-is-prese.patch
@@ -0,0 +1,51 @@
+From 78f1cd97e0c8ffa01b96be7ef7893e038cd96508 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jun 2022 16:47:44 +0200
+Subject: drm/vc4: hdmi: Disable audio if dmas property is present but empty
+
+From: Phil Elwell <phil@raspberrypi.org>
+
+[ Upstream commit db2b927f8668adf3ac765e0921cd2720f5c04172 ]
+
+The dmas property is used to hold the dmaengine channel used for audio
+output.
+
+Older device trees were missing that property, so if it's not there we
+disable the audio output entirely.
+
+However, some overlays have set an empty value to that property, mostly
+to workaround the fact that overlays cannot remove a property. Let's add
+a test for that case and if it's empty, let's disable it as well.
+
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Phil Elwell <phil@raspberrypi.org>
+Link: https://lore.kernel.org/r/20220613144800.326124-18-maxime@cerno.tech
+Signed-off-by: Maxime Ripard <maxime@cerno.tech>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/vc4/vc4_hdmi.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/gpu/drm/vc4/vc4_hdmi.c b/drivers/gpu/drm/vc4/vc4_hdmi.c
+index ce9d16666d91..6b4f42332d95 100644
+--- a/drivers/gpu/drm/vc4/vc4_hdmi.c
++++ b/drivers/gpu/drm/vc4/vc4_hdmi.c
+@@ -2035,12 +2035,12 @@ static int vc4_hdmi_audio_init(struct vc4_hdmi *vc4_hdmi)
+       struct device *dev = &vc4_hdmi->pdev->dev;
+       struct platform_device *codec_pdev;
+       const __be32 *addr;
+-      int index;
++      int index, len;
+       int ret;
+ 
+-      if (!of_find_property(dev->of_node, "dmas", NULL)) {
++      if (!of_find_property(dev->of_node, "dmas", &len) || !len) {
+               dev_warn(dev,
+-                       "'dmas' DT property is missing, no HDMI audio\n");
++                       "'dmas' DT property is missing or empty, no HDMI audio\n");
+               return 0;
+       }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/epoll-autoremove-wakers-even-more-aggressively.patch-6975 b/queue-5.19/epoll-autoremove-wakers-even-more-aggressively.patch-6975

new file mode 100644 (file)

index 0000000..28b5ddb
--- /dev/null
+++ b/queue-5.19/epoll-autoremove-wakers-even-more-aggressively.patch-6975
@@ -0,0 +1,95 @@
+From a243ba1c4a932a2513ab0222b5c31d019538b199 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 14:24:23 -0700
+Subject: epoll: autoremove wakers even more aggressively
+
+From: Benjamin Segall <bsegall@google.com>
+
+[ Upstream commit a16ceb13961068f7209e34d7984f8e42d2c06159 ]
+
+If a process is killed or otherwise exits while having active network
+connections and many threads waiting on epoll_wait, the threads will all
+be woken immediately, but not removed from ep->wq.  Then when network
+traffic scans ep->wq in wake_up, every wakeup attempt will fail, and will
+not remove the entries from the list.
+
+This means that the cost of the wakeup attempt is far higher than usual,
+does not decrease, and this also competes with the dying threads trying to
+actually make progress and remove themselves from the wq.
+
+Handle this by removing visited epoll wq entries unconditionally, rather
+than only when the wakeup succeeds - the structure of ep_poll means that
+the only potential loss is the timed_out->eavail heuristic, which now can
+race and result in a redundant ep_send_events attempt.  (But only when
+incoming data and a timeout actually race, not on every timeout)
+
+Shakeel added:
+
+: We are seeing this issue in production with real workloads and it has
+: caused hard lockups.  Particularly network heavy workloads with a lot
+: of threads in epoll_wait() can easily trigger this issue if they get
+: killed (oom-killed in our case).
+
+Link: https://lkml.kernel.org/r/xm26fsjotqda.fsf@google.com
+Signed-off-by: Ben Segall <bsegall@google.com>
+Tested-by: Shakeel Butt <shakeelb@google.com>
+Cc: Alexander Viro <viro@zeniv.linux.org.uk>
+Cc: Linus Torvalds <torvalds@linux-foundation.org>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Roman Penyaev <rpenyaev@suse.de>
+Cc: Jason Baron <jbaron@akamai.com>
+Cc: Khazhismel Kumykov <khazhy@google.com>
+Cc: Heiher <r@hev.cc>
+Cc: <stable@kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/eventpoll.c | 22 ++++++++++++++++++++++
+ 1 file changed, 22 insertions(+)
+
+diff --git a/fs/eventpoll.c b/fs/eventpoll.c
+index e2daa940ebce..8b56b94e2f56 100644
+--- a/fs/eventpoll.c
++++ b/fs/eventpoll.c
+@@ -1747,6 +1747,21 @@ static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
+       return to;
+ }
+ 
++/*
++ * autoremove_wake_function, but remove even on failure to wake up, because we
++ * know that default_wake_function/ttwu will only fail if the thread is already
++ * woken, and in that case the ep_poll loop will remove the entry anyways, not
++ * try to reuse it.
++ */
++static int ep_autoremove_wake_function(struct wait_queue_entry *wq_entry,
++                                     unsigned int mode, int sync, void *key)
++{
++      int ret = default_wake_function(wq_entry, mode, sync, key);
++
++      list_del_init(&wq_entry->entry);
++      return ret;
++}
++
+ /**
+  * ep_poll - Retrieves ready events, and delivers them to the caller-supplied
+  *           event buffer.
+@@ -1828,8 +1843,15 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
+                * normal wakeup path no need to call __remove_wait_queue()
+                * explicitly, thus ep->lock is not taken, which halts the
+                * event delivery.
++               *
++               * In fact, we now use an even more aggressive function that
++               * unconditionally removes, because we don't reuse the wait
++               * entry between loop iterations. This lets us also avoid the
++               * performance issue if a process is killed, causing all of its
++               * threads to wake up without being removed normally.
+                */
+               init_wait(&wait);
++              wait.func = ep_autoremove_wake_function;
+ 
+               write_lock_irq(&ep->lock);
+               /*
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch b/queue-5.19/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch

new file mode 100644 (file)

index 0000000..3f41d43
--- /dev/null
+++ b/queue-5.19/ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch
@@ -0,0 +1,50 @@
+From 6d582b78b9e5f0d1bf339b62dc16fd94d690d204 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:55 +0800
+Subject: ext4: add EXT4_INODE_HAS_XATTR_SPACE macro in xattr.h
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 179b14152dcb6a24c3415200603aebca70ff13af ]
+
+When adding an xattr to an inode, we must ensure that the inode_size is
+not less than EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad. Otherwise,
+the end position may be greater than the start position, resulting in UAF.
+
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Link: https://lore.kernel.org/r/20220616021358.2504451-2-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.h | 13 +++++++++++++
+ 1 file changed, 13 insertions(+)
+
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index 77efb9a627ad..f885f362add4 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -95,6 +95,19 @@ struct ext4_xattr_entry {
+ 
+ #define EXT4_ZERO_XATTR_VALUE ((void *)-1)
+ 
++/*
++ * If we want to add an xattr to the inode, we should make sure that
++ * i_extra_isize is not 0 and that the inode size is not less than
++ * EXT4_GOOD_OLD_INODE_SIZE + extra_isize + pad.
++ *   EXT4_GOOD_OLD_INODE_SIZE   extra_isize header   entry   pad  data
++ * |--------------------------|------------|------|---------|---|-------|
++ */
++#define EXT4_INODE_HAS_XATTR_SPACE(inode)                             \
++      ((EXT4_I(inode)->i_extra_isize != 0) &&                         \
++       (EXT4_GOOD_OLD_INODE_SIZE + EXT4_I(inode)->i_extra_isize +     \
++        sizeof(struct ext4_xattr_ibody_header) + EXT4_XATTR_PAD <=    \
++        EXT4_INODE_SIZE((inode)->i_sb)))
++
+ struct ext4_xattr_info {
+       const char *name;
+       const void *value;
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-check-if-directory-block-is-within-i_size.patch b/queue-5.19/ext4-check-if-directory-block-is-within-i_size.patch

new file mode 100644 (file)

index 0000000..8bc427e
--- /dev/null
+++ b/queue-5.19/ext4-check-if-directory-block-is-within-i_size.patch
@@ -0,0 +1,56 @@
+From 08fa5a52a0dbbc7bc9dd4076dfb1115e06b44a69 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:27:20 +0200
+Subject: ext4: check if directory block is within i_size
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit 65f8ea4cd57dbd46ea13b41dc8bac03176b04233 ]
+
+Currently ext4 directory handling code implicitly assumes that the
+directory blocks are always within the i_size. In fact ext4_append()
+will attempt to allocate next directory block based solely on i_size and
+the i_size is then appropriately increased after a successful
+allocation.
+
+However, for this to work it requires i_size to be correct. If, for any
+reason, the directory inode i_size is corrupted in a way that the
+directory tree refers to a valid directory block past i_size, we could
+end up corrupting parts of the directory tree structure by overwriting
+already used directory blocks when modifying the directory.
+
+Fix it by catching the corruption early in __ext4_read_dirblock().
+
+Addresses Red-Hat-Bugzilla: #2070205
+CVE: CVE-2022-1184
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Cc: stable@vger.kernel.org
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220704142721.157985-1-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index db4ba99d1ceb..cf460aa4f81d 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -110,6 +110,13 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
+       struct ext4_dir_entry *dirent;
+       int is_dx_block = 0;
+ 
++      if (block >= inode->i_size) {
++              ext4_error_inode(inode, func, line, block,
++                     "Attempting to read directory block (%u) that is past i_size (%llu)",
++                     block, inode->i_size);
++              return ERR_PTR(-EFSCORRUPTED);
++      }
++
+       if (ext4_simulate_fail(inode->i_sb, EXT4_SIM_DIRBLOCK_EIO))
+               bh = ERR_PTR(-EIO);
+       else
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-correct-max_inline_xattr_value_size-computing.patch b/queue-5.19/ext4-correct-max_inline_xattr_value_size-computing.patch

new file mode 100644 (file)

index 0000000..90a7b93
--- /dev/null
+++ b/queue-5.19/ext4-correct-max_inline_xattr_value_size-computing.patch
@@ -0,0 +1,41 @@
+From 84dca19e2b908bbb765f76e52e83dda6135bda95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:57 +0800
+Subject: ext4: correct max_inline_xattr_value_size computing
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit c9fd167d57133c5b748d16913c4eabc55e531c73 ]
+
+If the ext4 inode does not have xattr space, 0 is returned in the
+get_max_inline_xattr_value_size function. Otherwise, the function returns
+a negative value when the inode does not contain EXT4_STATE_XATTR.
+
+Cc: stable@kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-4-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inline.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
+index 1fa36cbe09ec..a4fbe825694b 100644
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -36,6 +36,9 @@ static int get_max_inline_xattr_value_size(struct inode *inode,
+       struct ext4_inode *raw_inode;
+       int free, min_offs;
+ 
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
++              return 0;
++
+       min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
+                       EXT4_GOOD_OLD_INODE_SIZE -
+                       EXT4_I(inode)->i_extra_isize -
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch b/queue-5.19/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch

new file mode 100644 (file)

index 0000000..1e94c28
--- /dev/null
+++ b/queue-5.19/ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch
@@ -0,0 +1,40 @@
+From 771e07336219dc4a7af050979534f19f27c23fde Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:58 +0800
+Subject: ext4: correct the misjudgment in ext4_iget_extra_inode
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit fd7e672ea98b95b9d4c9dae316639f03c16a749d ]
+
+Use the EXT4_INODE_HAS_XATTR_SPACE macro to more accurately
+determine whether the inode have xattr space.
+
+Cc: stable@kernel.org
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-5-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 0ccff6214fc8..2ad139d78574 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -4692,8 +4692,7 @@ static inline int ext4_iget_extra_inode(struct inode *inode,
+       __le32 *magic = (void *)raw_inode +
+                       EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize;
+ 
+-      if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize + sizeof(__le32) <=
+-          EXT4_INODE_SIZE(inode->i_sb) &&
++      if (EXT4_INODE_HAS_XATTR_SPACE(inode)  &&
+           *magic == cpu_to_le32(EXT4_XATTR_MAGIC)) {
+               ext4_set_inode_state(inode, EXT4_STATE_XATTR);
+               return ext4_find_inline_data_nolock(inode);
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-fix-extent-status-tree-race-in-writeback-error-.patch b/queue-5.19/ext4-fix-extent-status-tree-race-in-writeback-error-.patch

new file mode 100644 (file)

index 0000000..8469b46
--- /dev/null
+++ b/queue-5.19/ext4-fix-extent-status-tree-race-in-writeback-error-.patch
@@ -0,0 +1,57 @@
+From 712056b030923d3056a9e93c5d1f39483ce9db21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 12:05:30 -0400
+Subject: ext4: fix extent status tree race in writeback error recovery path
+
+From: Eric Whitney <enwlinux@gmail.com>
+
+[ Upstream commit 7f0d8e1d607c1a4fa9a27362a108921d82230874 ]
+
+A race can occur in the unlikely event ext4 is unable to allocate a
+physical cluster for a delayed allocation in a bigalloc file system
+during writeback.  Failure to allocate a cluster forces error recovery
+that includes a call to mpage_release_unused_pages().  That function
+removes any corresponding delayed allocated blocks from the extent
+status tree.  If a new delayed write is in progress on the same cluster
+simultaneously, resulting in the addition of an new extent containing
+one or more blocks in that cluster to the extent status tree, delayed
+block accounting can be thrown off if that delayed write then encounters
+a similar cluster allocation failure during future writeback.
+
+Write lock the i_data_sem in mpage_release_unused_pages() to fix this
+problem.  Ext4's block/cluster accounting code for bigalloc relies on
+i_data_sem for mutual exclusion, as is found in the delayed write path,
+and the locking in mpage_release_unused_pages() is missing.
+
+Cc: stable@kernel.org
+Reported-by: Ye Bin <yebin10@huawei.com>
+Signed-off-by: Eric Whitney <enwlinux@gmail.com>
+Link: https://lore.kernel.org/r/20220615160530.1928801-1-enwlinux@gmail.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 84c0eb55071d..0ccff6214fc8 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -1571,7 +1571,14 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
+               ext4_lblk_t start, last;
+               start = index << (PAGE_SHIFT - inode->i_blkbits);
+               last = end << (PAGE_SHIFT - inode->i_blkbits);
++
++              /*
++               * avoid racing with extent status tree scans made by
++               * ext4_insert_delayed_block()
++               */
++              down_write(&EXT4_I(inode)->i_data_sem);
+               ext4_es_remove_extent(inode, start, last - start + 1);
++              up_write(&EXT4_I(inode)->i_data_sem);
+       }
+ 
+       pagevec_init(&pvec);
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-fix-race-when-reusing-xattr-blocks.patch b/queue-5.19/ext4-fix-race-when-reusing-xattr-blocks.patch

new file mode 100644 (file)

index 0000000..95d0a0f
--- /dev/null
+++ b/queue-5.19/ext4-fix-race-when-reusing-xattr-blocks.patch
@@ -0,0 +1,179 @@
+From 03dbffc0279cc2ff5976f6d8e70fb4c974c5ccb5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:24 +0200
+Subject: ext4: fix race when reusing xattr blocks
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 65f8b80053a1b2fd602daa6814e62d6fa90e5e9b ]
+
+When ext4_xattr_block_set() decides to remove xattr block the following
+race can happen:
+
+CPU1                                    CPU2
+ext4_xattr_block_set()                  ext4_xattr_release_block()
+  new_bh = ext4_xattr_block_cache_find()
+
+                                          lock_buffer(bh);
+                                          ref = le32_to_cpu(BHDR(bh)->h_refcount);
+                                          if (ref == 1) {
+                                            ...
+                                            mb_cache_entry_delete();
+                                            unlock_buffer(bh);
+                                            ext4_free_blocks();
+                                              ...
+                                              ext4_forget(..., bh, ...);
+                                                jbd2_journal_revoke(..., bh);
+
+  ext4_journal_get_write_access(..., new_bh, ...)
+    do_get_write_access()
+      jbd2_journal_cancel_revoke(..., new_bh);
+
+Later the code in ext4_xattr_block_set() finds out the block got freed
+and cancels reusal of the block but the revoke stays canceled and so in
+case of block reuse and journal replay the filesystem can get corrupted.
+If the race works out slightly differently, we can also hit assertions
+in the jbd2 code.
+
+Fix the problem by making sure that once matching mbcache entry is
+found, code dropping the last xattr block reference (or trying to modify
+xattr block in place) waits until the mbcache entry reference is
+dropped. This way code trying to reuse xattr block is protected from
+someone trying to drop the last reference to xattr block.
+
+Reported-and-tested-by: Ritesh Harjani <ritesh.list@gmail.com>
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-5-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 67 +++++++++++++++++++++++++++++++++----------------
+ 1 file changed, 45 insertions(+), 22 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index a25942a74929..533216e80fa2 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -439,9 +439,16 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+ /* Remove entry from mbcache when EA inode is getting evicted */
+ void ext4_evict_ea_inode(struct inode *inode)
+ {
+-      if (EA_INODE_CACHE(inode))
+-              mb_cache_entry_delete(EA_INODE_CACHE(inode),
+-                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++      struct mb_cache_entry *oe;
++
++      if (!EA_INODE_CACHE(inode))
++              return;
++      /* Wait for entry to get unused so that we can remove it */
++      while ((oe = mb_cache_entry_delete_or_get(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino))) {
++              mb_cache_entry_wait_unused(oe);
++              mb_cache_entry_put(EA_INODE_CACHE(inode), oe);
++      }
+ }
+ 
+ static int
+@@ -1229,6 +1236,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+       if (error)
+               goto out;
+ 
++retry_ref:
+       lock_buffer(bh);
+       hash = le32_to_cpu(BHDR(bh)->h_hash);
+       ref = le32_to_cpu(BHDR(bh)->h_refcount);
+@@ -1238,9 +1246,18 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
+                * This must happen under buffer lock for
+                * ext4_xattr_block_set() to reliably detect freed block
+                */
+-              if (ea_block_cache)
+-                      mb_cache_entry_delete(ea_block_cache, hash,
+-                                            bh->b_blocknr);
++              if (ea_block_cache) {
++                      struct mb_cache_entry *oe;
++
++                      oe = mb_cache_entry_delete_or_get(ea_block_cache, hash,
++                                                        bh->b_blocknr);
++                      if (oe) {
++                              unlock_buffer(bh);
++                              mb_cache_entry_wait_unused(oe);
++                              mb_cache_entry_put(ea_block_cache, oe);
++                              goto retry_ref;
++                      }
++              }
+               get_bh(bh);
+               unlock_buffer(bh);
+ 
+@@ -1867,9 +1884,20 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                        * ext4_xattr_block_set() to reliably detect modified
+                        * block
+                        */
+-                      if (ea_block_cache)
+-                              mb_cache_entry_delete(ea_block_cache, hash,
+-                                                    bs->bh->b_blocknr);
++                      if (ea_block_cache) {
++                              struct mb_cache_entry *oe;
++
++                              oe = mb_cache_entry_delete_or_get(ea_block_cache,
++                                      hash, bs->bh->b_blocknr);
++                              if (oe) {
++                                      /*
++                                       * Xattr block is getting reused. Leave
++                                       * it alone.
++                                       */
++                                      mb_cache_entry_put(ea_block_cache, oe);
++                                      goto clone_block;
++                              }
++                      }
+                       ea_bdebug(bs->bh, "modifying in-place");
+                       error = ext4_xattr_set_entry(i, s, handle, inode,
+                                                    true /* is_block */);
+@@ -1885,6 +1913,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               goto cleanup;
+                       goto inserted;
+               }
++clone_block:
+               unlock_buffer(bs->bh);
+               ea_bdebug(bs->bh, "cloning");
+               s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+@@ -1990,18 +2019,13 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                               lock_buffer(new_bh);
+                               /*
+                                * We have to be careful about races with
+-                               * freeing, rehashing or adding references to
+-                               * xattr block. Once we hold buffer lock xattr
+-                               * block's state is stable so we can check
+-                               * whether the block got freed / rehashed or
+-                               * not.  Since we unhash mbcache entry under
+-                               * buffer lock when freeing / rehashing xattr
+-                               * block, checking whether entry is still
+-                               * hashed is reliable. Same rules hold for
+-                               * e_reusable handling.
++                               * adding references to xattr block. Once we
++                               * hold buffer lock xattr block's state is
++                               * stable so we can check the additional
++                               * reference fits.
+                                */
+-                              if (hlist_bl_unhashed(&ce->e_hash_list) ||
+-                                  !ce->e_reusable) {
++                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
++                              if (ref > EXT4_XATTR_REFCOUNT_MAX) {
+                                       /*
+                                        * Undo everything and check mbcache
+                                        * again.
+@@ -2016,9 +2040,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                                       new_bh = NULL;
+                                       goto inserted;
+                               }
+-                              ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+                               BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+-                              if (ref >= EXT4_XATTR_REFCOUNT_MAX)
++                              if (ref == EXT4_XATTR_REFCOUNT_MAX)
+                                       ce->e_reusable = 0;
+                               ea_bdebug(new_bh, "reusing; refcount now=%d",
+                                         ref);
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-fix-reading-leftover-inlined-symlinks.patch b/queue-5.19/ext4-fix-reading-leftover-inlined-symlinks.patch

new file mode 100644 (file)

index 0000000..c76ce9a
--- /dev/null
+++ b/queue-5.19/ext4-fix-reading-leftover-inlined-symlinks.patch
@@ -0,0 +1,126 @@
+From ba855ed0e2e098d4dee66c65f28407d44183a7ef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Jun 2022 17:01:00 +0800
+Subject: ext4: fix reading leftover inlined symlinks
+
+From: Zhang Yi <yi.zhang@huawei.com>
+
+[ Upstream commit 5a57bca9050d740ca37184302e23d0e7633e3ebc ]
+
+Since commit 6493792d3299 ("ext4: convert symlink external data block
+mapping to bdev"), create new symlink with inline_data is not supported,
+but it missing to handle the leftover inlined symlinks, which could
+cause below error message and fail to read symlink.
+
+ ls: cannot read symbolic link 'foo': Structure needs cleaning
+
+ EXT4-fs error (device sda): ext4_map_blocks:605: inode #12: block
+ 2021161080: comm ls: lblock 0 mapped to illegal pblock 2021161080
+ (length 1)
+
+Fix this regression by adding ext4_read_inline_link(), which read the
+inline data directly and convert it through a kmalloced buffer.
+
+Fixes: 6493792d3299 ("ext4: convert symlink external data block mapping to bdev")
+Cc: stable@kernel.org
+Reported-by: Torge Matthies <openglfreak@googlemail.com>
+Signed-off-by: Zhang Yi <yi.zhang@huawei.com>
+Tested-by: Torge Matthies <openglfreak@googlemail.com>
+Link: https://lore.kernel.org/r/20220630090100.2769490-1-yi.zhang@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/ext4.h    |  1 +
+ fs/ext4/inline.c  | 30 ++++++++++++++++++++++++++++++
+ fs/ext4/symlink.c | 15 +++++++++++++++
+ 3 files changed, 46 insertions(+)
+
+diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
+index 75b8d81b2469..adfc30ee4b7b 100644
+--- a/fs/ext4/ext4.h
++++ b/fs/ext4/ext4.h
+@@ -3583,6 +3583,7 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
+ extern int ext4_inline_data_fiemap(struct inode *inode,
+                                  struct fiemap_extent_info *fieinfo,
+                                  int *has_inline, __u64 start, __u64 len);
++extern void *ext4_read_inline_link(struct inode *inode);
+ 
+ struct iomap;
+ extern int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap);
+diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
+index cff52ff6549d..1fa36cbe09ec 100644
+--- a/fs/ext4/inline.c
++++ b/fs/ext4/inline.c
+@@ -6,6 +6,7 @@
+ 
+ #include <linux/iomap.h>
+ #include <linux/fiemap.h>
++#include <linux/namei.h>
+ #include <linux/iversion.h>
+ #include <linux/sched/mm.h>
+ 
+@@ -1588,6 +1589,35 @@ int ext4_read_inline_dir(struct file *file,
+       return ret;
+ }
+ 
++void *ext4_read_inline_link(struct inode *inode)
++{
++      struct ext4_iloc iloc;
++      int ret, inline_size;
++      void *link;
++
++      ret = ext4_get_inode_loc(inode, &iloc);
++      if (ret)
++              return ERR_PTR(ret);
++
++      ret = -ENOMEM;
++      inline_size = ext4_get_inline_size(inode);
++      link = kmalloc(inline_size + 1, GFP_NOFS);
++      if (!link)
++              goto out;
++
++      ret = ext4_read_inline_data(inode, link, inline_size, &iloc);
++      if (ret < 0) {
++              kfree(link);
++              goto out;
++      }
++      nd_terminate_link(link, inode->i_size, ret);
++out:
++      if (ret < 0)
++              link = ERR_PTR(ret);
++      brelse(iloc.bh);
++      return link;
++}
++
+ struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
+                                       struct ext4_dir_entry_2 **parent_de,
+                                       int *retval)
+diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
+index d281f5bcc526..3d3ed3c38f56 100644
+--- a/fs/ext4/symlink.c
++++ b/fs/ext4/symlink.c
+@@ -74,6 +74,21 @@ static const char *ext4_get_link(struct dentry *dentry, struct inode *inode,
+                                struct delayed_call *callback)
+ {
+       struct buffer_head *bh;
++      char *inline_link;
++
++      /*
++       * Create a new inlined symlink is not supported, just provide a
++       * method to read the leftovers.
++       */
++      if (ext4_has_inline_data(inode)) {
++              if (!dentry)
++                      return ERR_PTR(-ECHILD);
++
++              inline_link = ext4_read_inline_link(inode);
++              if (!IS_ERR(inline_link))
++                      set_delayed_call(callback, kfree_link, inline_link);
++              return inline_link;
++      }
+ 
+       if (!dentry) {
+               bh = ext4_getblk(NULL, inode, 0, EXT4_GET_BLOCKS_CACHED_NOWAIT);
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch b/queue-5.19/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch

new file mode 100644 (file)

index 0000000..bb5f64a
--- /dev/null
+++ b/queue-5.19/ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch
@@ -0,0 +1,128 @@
+From b2791c469d1eb6c13db27604e6f462f9a6d0112a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jun 2022 10:13:56 +0800
+Subject: ext4: fix use-after-free in ext4_xattr_set_entry
+
+From: Baokun Li <libaokun1@huawei.com>
+
+[ Upstream commit 67d7d8ad99beccd9fe92d585b87f1760dc9018e3 ]
+
+Hulk Robot reported a issue:
+==================================================================
+BUG: KASAN: use-after-free in ext4_xattr_set_entry+0x18ab/0x3500
+Write of size 4105 at addr ffff8881675ef5f4 by task syz-executor.0/7092
+
+CPU: 1 PID: 7092 Comm: syz-executor.0 Not tainted 4.19.90-dirty #17
+Call Trace:
+[...]
+ memcpy+0x34/0x50 mm/kasan/kasan.c:303
+ ext4_xattr_set_entry+0x18ab/0x3500 fs/ext4/xattr.c:1747
+ ext4_xattr_ibody_inline_set+0x86/0x2a0 fs/ext4/xattr.c:2205
+ ext4_xattr_set_handle+0x940/0x1300 fs/ext4/xattr.c:2386
+ ext4_xattr_set+0x1da/0x300 fs/ext4/xattr.c:2498
+ __vfs_setxattr+0x112/0x170 fs/xattr.c:149
+ __vfs_setxattr_noperm+0x11b/0x2a0 fs/xattr.c:180
+ __vfs_setxattr_locked+0x17b/0x250 fs/xattr.c:238
+ vfs_setxattr+0xed/0x270 fs/xattr.c:255
+ setxattr+0x235/0x330 fs/xattr.c:520
+ path_setxattr+0x176/0x190 fs/xattr.c:539
+ __do_sys_lsetxattr fs/xattr.c:561 [inline]
+ __se_sys_lsetxattr fs/xattr.c:557 [inline]
+ __x64_sys_lsetxattr+0xc2/0x160 fs/xattr.c:557
+ do_syscall_64+0xdf/0x530 arch/x86/entry/common.c:298
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+RIP: 0033:0x459fe9
+RSP: 002b:00007fa5e54b4c08 EFLAGS: 00000246 ORIG_RAX: 00000000000000bd
+RAX: ffffffffffffffda RBX: 000000000051bf60 RCX: 0000000000459fe9
+RDX: 00000000200003c0 RSI: 0000000020000180 RDI: 0000000020000140
+RBP: 000000000051bf60 R08: 0000000000000001 R09: 0000000000000000
+R10: 0000000000001009 R11: 0000000000000246 R12: 0000000000000000
+R13: 00007ffc73c93fc0 R14: 000000000051bf60 R15: 00007fa5e54b4d80
+[...]
+==================================================================
+
+Above issue may happen as follows:
+-------------------------------------
+ext4_xattr_set
+  ext4_xattr_set_handle
+    ext4_xattr_ibody_find
+      >> s->end < s->base
+      >> no EXT4_STATE_XATTR
+      >> xattr_check_inode is not executed
+    ext4_xattr_ibody_set
+      ext4_xattr_set_entry
+       >> size_t min_offs = s->end - s->base
+       >> UAF in memcpy
+
+we can easily reproduce this problem with the following commands:
+    mkfs.ext4 -F /dev/sda
+    mount -o debug_want_extra_isize=128 /dev/sda /mnt
+    touch /mnt/file
+    setfattr -n user.cat -v `seq -s z 4096|tr -d '[:digit:]'` /mnt/file
+
+In ext4_xattr_ibody_find, we have the following assignment logic:
+  header = IHDR(inode, raw_inode)
+         = raw_inode + EXT4_GOOD_OLD_INODE_SIZE + i_extra_isize
+  is->s.base = IFIRST(header)
+             = header + sizeof(struct ext4_xattr_ibody_header)
+  is->s.end = raw_inode + s_inode_size
+
+In ext4_xattr_set_entry
+  min_offs = s->end - s->base
+           = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize -
+            sizeof(struct ext4_xattr_ibody_header)
+  last = s->first
+  free = min_offs - ((void *)last - s->base) - sizeof(__u32)
+       = s_inode_size - EXT4_GOOD_OLD_INODE_SIZE - i_extra_isize -
+         sizeof(struct ext4_xattr_ibody_header) - sizeof(__u32)
+
+In the calculation formula, all values except s_inode_size and
+i_extra_size are fixed values. When i_extra_size is the maximum value
+s_inode_size - EXT4_GOOD_OLD_INODE_SIZE, min_offs is -4 and free is -8.
+The value overflows. As a result, the preceding issue is triggered when
+memcpy is executed.
+
+Therefore, when finding xattr or setting xattr, check whether
+there is space for storing xattr in the inode to resolve this issue.
+
+Cc: stable@kernel.org
+Reported-by: Hulk Robot <hulkci@huawei.com>
+Signed-off-by: Baokun Li <libaokun1@huawei.com>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Reviewed-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220616021358.2504451-3-libaokun1@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index 564e28a1aa94..c42b3e0d2d94 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -2175,8 +2175,9 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+       struct ext4_inode *raw_inode;
+       int error;
+ 
+-      if (EXT4_I(inode)->i_extra_isize == 0)
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return 0;
++
+       raw_inode = ext4_raw_inode(&is->iloc);
+       header = IHDR(inode, raw_inode);
+       is->s.base = is->s.first = IFIRST(header);
+@@ -2204,8 +2205,9 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+       struct ext4_xattr_search *s = &is->s;
+       int error;
+ 
+-      if (EXT4_I(inode)->i_extra_isize == 0)
++      if (!EXT4_INODE_HAS_XATTR_SPACE(inode))
+               return -ENOSPC;
++
+       error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */);
+       if (error)
+               return error;
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch b/queue-5.19/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch

new file mode 100644 (file)

index 0000000..46ee3fe
--- /dev/null
+++ b/queue-5.19/ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch
@@ -0,0 +1,103 @@
+From ccf1c1b18f2d8d9ecafff8103a2644000cee2caa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 17 Jun 2022 09:39:35 +0800
+Subject: ext4: fix warning in ext4_iomap_begin as race between bmap and write
+
+From: Ye Bin <yebin10@huawei.com>
+
+[ Upstream commit 51ae846cff568c8c29921b1b28eb2dfbcd4ac12d ]
+
+We got issue as follows:
+------------[ cut here ]------------
+WARNING: CPU: 3 PID: 9310 at fs/ext4/inode.c:3441 ext4_iomap_begin+0x182/0x5d0
+RIP: 0010:ext4_iomap_begin+0x182/0x5d0
+RSP: 0018:ffff88812460fa08 EFLAGS: 00010293
+RAX: ffff88811f168000 RBX: 0000000000000000 RCX: ffffffff97793c12
+RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000003
+RBP: ffff88812c669160 R08: ffff88811f168000 R09: ffffed10258cd20f
+R10: ffff88812c669077 R11: ffffed10258cd20e R12: 0000000000000001
+R13: 00000000000000a4 R14: 000000000000000c R15: ffff88812c6691ee
+FS:  00007fd0d6ff3740(0000) GS:ffff8883af180000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007fd0d6dda290 CR3: 0000000104a62000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ iomap_apply+0x119/0x570
+ iomap_bmap+0x124/0x150
+ ext4_bmap+0x14f/0x250
+ bmap+0x55/0x80
+ do_vfs_ioctl+0x952/0xbd0
+ __x64_sys_ioctl+0xc6/0x170
+ do_syscall_64+0x33/0x40
+ entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+Above issue may happen as follows:
+          bmap                    write
+bmap
+  ext4_bmap
+    iomap_bmap
+      ext4_iomap_begin
+                            ext4_file_write_iter
+                             ext4_buffered_write_iter
+                               generic_perform_write
+                                 ext4_da_write_begin
+                                   ext4_da_write_inline_data_begin
+                                     ext4_prepare_inline_data
+                                       ext4_create_inline_data
+                                         ext4_set_inode_flag(inode,
+                                               EXT4_INODE_INLINE_DATA);
+      if (WARN_ON_ONCE(ext4_has_inline_data(inode))) ->trigger bug_on
+
+To solved above issue hold inode lock in ext4_bamp.
+
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Link: https://lore.kernel.org/r/20220617013935.397596-1-yebin10@huawei.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 2ad139d78574..14fd481bf601 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -3147,13 +3147,15 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+ {
+       struct inode *inode = mapping->host;
+       journal_t *journal;
++      sector_t ret = 0;
+       int err;
+ 
++      inode_lock_shared(inode);
+       /*
+        * We can get here for an inline file via the FIBMAP ioctl
+        */
+       if (ext4_has_inline_data(inode))
+-              return 0;
++              goto out;
+ 
+       if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+                       test_opt(inode->i_sb, DELALLOC)) {
+@@ -3192,10 +3194,14 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
+               jbd2_journal_unlock_updates(journal);
+ 
+               if (err)
+-                      return 0;
++                      goto out;
+       }
+ 
+-      return iomap_bmap(mapping, block, &ext4_iomap_ops);
++      ret = iomap_bmap(mapping, block, &ext4_iomap_ops);
++
++out:
++      inode_unlock_shared(inode);
++      return ret;
+ }
+ 
+ static int ext4_read_folio(struct file *file, struct folio *folio)
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch b/queue-5.19/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch

new file mode 100644 (file)

index 0000000..48d9790
--- /dev/null
+++ b/queue-5.19/ext4-make-sure-ext4_append-always-allocates-new-bloc.patch
@@ -0,0 +1,63 @@
+From bd53256fd31e2cf44df1912f04e72481d15d9582 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:27:21 +0200
+Subject: ext4: make sure ext4_append() always allocates new block
+
+From: Lukas Czerner <lczerner@redhat.com>
+
+[ Upstream commit b8a04fe77ef1360fbf73c80fddbdfeaa9407ed1b ]
+
+ext4_append() must always allocate a new block, otherwise we run the
+risk of overwriting existing directory block corrupting the directory
+tree in the process resulting in all manner of problems later on.
+
+Add a sanity check to see if the logical block is already allocated and
+error out if it is.
+
+Cc: stable@kernel.org
+Signed-off-by: Lukas Czerner <lczerner@redhat.com>
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220704142721.157985-2-lczerner@redhat.com
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/namei.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
+index cf460aa4f81d..4af441494e09 100644
+--- a/fs/ext4/namei.c
++++ b/fs/ext4/namei.c
+@@ -54,6 +54,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
+                                       struct inode *inode,
+                                       ext4_lblk_t *block)
+ {
++      struct ext4_map_blocks map;
+       struct buffer_head *bh;
+       int err;
+ 
+@@ -63,6 +64,21 @@ static struct buffer_head *ext4_append(handle_t *handle,
+               return ERR_PTR(-ENOSPC);
+ 
+       *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
++      map.m_lblk = *block;
++      map.m_len = 1;
++
++      /*
++       * We're appending new directory block. Make sure the block is not
++       * allocated yet, otherwise we will end up corrupting the
++       * directory.
++       */
++      err = ext4_map_blocks(NULL, inode, &map, 0);
++      if (err < 0)
++              return ERR_PTR(err);
++      if (err) {
++              EXT4_ERROR_INODE(inode, "Logical block already allocated");
++              return ERR_PTR(-EFSCORRUPTED);
++      }
+ 
+       bh = ext4_bread(handle, inode, *block, EXT4_GET_BLOCKS_CREATE);
+       if (IS_ERR(bh))
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch b/queue-5.19/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch

new file mode 100644 (file)

index 0000000..cc61257
--- /dev/null
+++ b/queue-5.19/ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
@@ -0,0 +1,116 @@
+From 55b9537a1c31862d8f4293dcc3bd406fca54ce41 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:22 +0200
+Subject: ext4: remove EA inode entry from mbcache on inode eviction
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 6bc0d63dad7f9f54d381925ee855b402f652fa39 ]
+
+Currently we remove EA inode from mbcache as soon as its xattr refcount
+drops to zero. However there can be pending attempts to reuse the inode
+and thus refcount handling code has to handle the situation when
+refcount increases from zero anyway. So save some work and just keep EA
+inode in mbcache until it is getting evicted. At that moment we are sure
+following iget() of EA inode will fail anyway (or wait for eviction to
+finish and load things from the disk again) and so removing mbcache
+entry at that moment is fine and simplifies the code a bit.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-3-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/inode.c |  2 ++
+ fs/ext4/xattr.c | 24 ++++++++----------------
+ fs/ext4/xattr.h |  1 +
+ 3 files changed, 11 insertions(+), 16 deletions(-)
+
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 14fd481bf601..560cf8dc5935 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -177,6 +177,8 @@ void ext4_evict_inode(struct inode *inode)
+ 
+       trace_ext4_evict_inode(inode);
+ 
++      if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
++              ext4_evict_ea_inode(inode);
+       if (inode->i_nlink) {
+               /*
+                * When journalling data dirty buffers are tracked only in the
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index c42b3e0d2d94..d92d50de5a01 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -436,6 +436,14 @@ static int ext4_xattr_inode_iget(struct inode *parent, unsigned long ea_ino,
+       return err;
+ }
+ 
++/* Remove entry from mbcache when EA inode is getting evicted */
++void ext4_evict_ea_inode(struct inode *inode)
++{
++      if (EA_INODE_CACHE(inode))
++              mb_cache_entry_delete(EA_INODE_CACHE(inode),
++                      ext4_xattr_inode_get_hash(inode), inode->i_ino);
++}
++
+ static int
+ ext4_xattr_inode_verify_hashes(struct inode *ea_inode,
+                              struct ext4_xattr_entry *entry, void *buffer,
+@@ -976,10 +984,8 @@ int __ext4_xattr_set_credits(struct super_block *sb, struct inode *inode,
+ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+                                      int ref_change)
+ {
+-      struct mb_cache *ea_inode_cache = EA_INODE_CACHE(ea_inode);
+       struct ext4_iloc iloc;
+       s64 ref_count;
+-      u32 hash;
+       int ret;
+ 
+       inode_lock(ea_inode);
+@@ -1002,14 +1008,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ 
+                       set_nlink(ea_inode, 1);
+                       ext4_orphan_del(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_create(ea_inode_cache,
+-                                                    GFP_NOFS, hash,
+-                                                    ea_inode->i_ino,
+-                                                    true /* reusable */);
+-                      }
+               }
+       } else {
+               WARN_ONCE(ref_count < 0, "EA inode %lu ref_count=%lld",
+@@ -1022,12 +1020,6 @@ static int ext4_xattr_inode_update_ref(handle_t *handle, struct inode *ea_inode,
+ 
+                       clear_nlink(ea_inode);
+                       ext4_orphan_add(handle, ea_inode);
+-
+-                      if (ea_inode_cache) {
+-                              hash = ext4_xattr_inode_get_hash(ea_inode);
+-                              mb_cache_entry_delete(ea_inode_cache, hash,
+-                                                    ea_inode->i_ino);
+-                      }
+               }
+       }
+ 
+diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
+index f885f362add4..e5e36bd11f05 100644
+--- a/fs/ext4/xattr.h
++++ b/fs/ext4/xattr.h
+@@ -191,6 +191,7 @@ extern void ext4_xattr_inode_array_free(struct ext4_xattr_inode_array *array);
+ 
+ extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
+                           struct ext4_inode *raw_inode, handle_t *handle);
++extern void ext4_evict_ea_inode(struct inode *inode);
+ 
+ extern const struct xattr_handler *ext4_xattr_handlers[];
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch b/queue-5.19/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch

new file mode 100644 (file)

index 0000000..e35c6d1
--- /dev/null
+++ b/queue-5.19/ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
@@ -0,0 +1,125 @@
+From 185bdf0d31787fd26092315a6029f3af812e432f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:23 +0200
+Subject: ext4: unindent codeblock in ext4_xattr_block_set()
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit fd48e9acdf26d0cbd80051de07d4a735d05d29b2 ]
+
+Remove unnecessary else (and thus indentation level) from a code block
+in ext4_xattr_block_set(). It will also make following code changes
+easier. No functional changes.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-4-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/xattr.c | 77 ++++++++++++++++++++++++-------------------------
+ 1 file changed, 38 insertions(+), 39 deletions(-)
+
+diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
+index d92d50de5a01..a25942a74929 100644
+--- a/fs/ext4/xattr.c
++++ b/fs/ext4/xattr.c
+@@ -1850,6 +1850,8 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+ #define header(x) ((struct ext4_xattr_header *)(x))
+ 
+       if (s->base) {
++              int offset = (char *)s->here - bs->bh->b_data;
++
+               BUFFER_TRACE(bs->bh, "get_write_access");
+               error = ext4_journal_get_write_access(handle, sb, bs->bh,
+                                                     EXT4_JTR_NONE);
+@@ -1882,49 +1884,46 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+                       if (error)
+                               goto cleanup;
+                       goto inserted;
+-              } else {
+-                      int offset = (char *)s->here - bs->bh->b_data;
++              }
++              unlock_buffer(bs->bh);
++              ea_bdebug(bs->bh, "cloning");
++              s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
++              error = -ENOMEM;
++              if (s->base == NULL)
++                      goto cleanup;
++              s->first = ENTRY(header(s->base)+1);
++              header(s->base)->h_refcount = cpu_to_le32(1);
++              s->here = ENTRY(s->base + offset);
++              s->end = s->base + bs->bh->b_size;
+ 
+-                      unlock_buffer(bs->bh);
+-                      ea_bdebug(bs->bh, "cloning");
+-                      s->base = kmemdup(BHDR(bs->bh), bs->bh->b_size, GFP_NOFS);
+-                      error = -ENOMEM;
+-                      if (s->base == NULL)
++              /*
++               * If existing entry points to an xattr inode, we need
++               * to prevent ext4_xattr_set_entry() from decrementing
++               * ref count on it because the reference belongs to the
++               * original block. In this case, make the entry look
++               * like it has an empty value.
++               */
++              if (!s->not_found && s->here->e_value_inum) {
++                      ea_ino = le32_to_cpu(s->here->e_value_inum);
++                      error = ext4_xattr_inode_iget(inode, ea_ino,
++                                    le32_to_cpu(s->here->e_hash),
++                                    &tmp_inode);
++                      if (error)
+                               goto cleanup;
+-                      s->first = ENTRY(header(s->base)+1);
+-                      header(s->base)->h_refcount = cpu_to_le32(1);
+-                      s->here = ENTRY(s->base + offset);
+-                      s->end = s->base + bs->bh->b_size;
+ 
+-                      /*
+-                       * If existing entry points to an xattr inode, we need
+-                       * to prevent ext4_xattr_set_entry() from decrementing
+-                       * ref count on it because the reference belongs to the
+-                       * original block. In this case, make the entry look
+-                       * like it has an empty value.
+-                       */
+-                      if (!s->not_found && s->here->e_value_inum) {
+-                              ea_ino = le32_to_cpu(s->here->e_value_inum);
+-                              error = ext4_xattr_inode_iget(inode, ea_ino,
+-                                            le32_to_cpu(s->here->e_hash),
+-                                            &tmp_inode);
+-                              if (error)
+-                                      goto cleanup;
+-
+-                              if (!ext4_test_inode_state(tmp_inode,
+-                                              EXT4_STATE_LUSTRE_EA_INODE)) {
+-                                      /*
+-                                       * Defer quota free call for previous
+-                                       * inode until success is guaranteed.
+-                                       */
+-                                      old_ea_inode_quota = le32_to_cpu(
+-                                                      s->here->e_value_size);
+-                              }
+-                              iput(tmp_inode);
+-
+-                              s->here->e_value_inum = 0;
+-                              s->here->e_value_size = 0;
++                      if (!ext4_test_inode_state(tmp_inode,
++                                      EXT4_STATE_LUSTRE_EA_INODE)) {
++                              /*
++                               * Defer quota free call for previous
++                               * inode until success is guaranteed.
++                               */
++                              old_ea_inode_quota = le32_to_cpu(
++                                              s->here->e_value_size);
+                       }
++                      iput(tmp_inode);
++
++                      s->here->e_value_inum = 0;
++                      s->here->e_value_size = 0;
+               }
+       } else {
+               /* Allocate a buffer where we construct the new block. */
+-- 
+2.35.1
+
diff --git a/queue-5.19/ext4-update-s_overhead_clusters-in-the-superblock-du.patch b/queue-5.19/ext4-update-s_overhead_clusters-in-the-superblock-du.patch

new file mode 100644 (file)

index 0000000..a62d152
--- /dev/null
+++ b/queue-5.19/ext4-update-s_overhead_clusters-in-the-superblock-du.patch
@@ -0,0 +1,51 @@
+From ca2b6f12dd16fc0051fc4e1af31aa96b79819e7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 29 Jun 2022 00:00:25 -0400
+Subject: ext4: update s_overhead_clusters in the superblock during an on-line
+ resize
+
+From: Theodore Ts'o <tytso@mit.edu>
+
+[ Upstream commit de394a86658ffe4e89e5328fd4993abfe41b7435 ]
+
+When doing an online resize, the on-disk superblock on-disk wasn't
+updated.  This means that when the file system is unmounted and
+remounted, and the on-disk overhead value is non-zero, this would
+result in the results of statfs(2) to be incorrect.
+
+This was partially fixed by Commits 10b01ee92df5 ("ext4: fix overhead
+calculation to account for the reserved gdt blocks"), 85d825dbf489
+("ext4: force overhead calculation if the s_overhead_cluster makes no
+sense"), and eb7054212eac ("ext4: update the cached overhead value in
+the superblock").
+
+However, since it was too expensive to forcibly recalculate the
+overhead for bigalloc file systems at every mount, this didn't fix the
+problem for bigalloc file systems.  This commit should address the
+problem when resizing file systems with the bigalloc feature enabled.
+
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Cc: stable@kernel.org
+Reviewed-by: Andreas Dilger <adilger@dilger.ca>
+Link: https://lore.kernel.org/r/20220629040026.112371-1-tytso@mit.edu
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ext4/resize.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
+index 8b70a4701293..e5c2713aa11a 100644
+--- a/fs/ext4/resize.c
++++ b/fs/ext4/resize.c
+@@ -1484,6 +1484,7 @@ static void ext4_update_super(struct super_block *sb,
+        * Update the fs overhead information
+        */
+       ext4_calculate_overhead(sb);
++      es->s_overhead_clusters = cpu_to_le32(sbi->s_overhead);
+ 
+       if (test_opt(sb, DEBUG))
+               printk(KERN_DEBUG "EXT4-fs: added group %u:"
+-- 
+2.35.1
+
diff --git a/queue-5.19/fbcon-fix-accelerated-fbdev-scrolling-while-logo-is-.patch b/queue-5.19/fbcon-fix-accelerated-fbdev-scrolling-while-logo-is-.patch

new file mode 100644 (file)

index 0000000..0339e1b
--- /dev/null
+++ b/queue-5.19/fbcon-fix-accelerated-fbdev-scrolling-while-logo-is-.patch
@@ -0,0 +1,57 @@
+From b98c086153d76fbbb9ccae4d4416547027db47db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 22:08:38 +0200
+Subject: fbcon: Fix accelerated fbdev scrolling while logo is still shown
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit 3866cba87dcd0162fb41e9b3b653d0af68fad5ec ]
+
+There is no need to directly skip over to the SCROLL_REDRAW case while
+the logo is still shown.
+
+When using DRM, this change has no effect because the code will reach
+the SCROLL_REDRAW case immediately anyway.
+
+But if you run an accelerated fbdev driver and have
+FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION enabled, console scrolling is
+slowed down by factors so that it feels as if you use a 9600 baud
+terminal.
+
+So, drop those unnecessary checks and speed up fbdev console
+acceleration during bootup.
+
+Cc: stable@vger.kernel.org # v5.10+
+Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/YpkYxk7wsBPx3po+@p100
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 4 ----
+ 1 file changed, 4 deletions(-)
+
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index 1a9aa12cf886..1a1de5b4645c 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -1758,8 +1758,6 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+       case SM_UP:
+               if (count > vc->vc_rows)        /* Maximum realistic size */
+                       count = vc->vc_rows;
+-              if (logo_shown >= 0)
+-                      goto redraw_up;
+               switch (fb_scrollmode(p)) {
+               case SCROLL_MOVE:
+                       fbcon_redraw_blit(vc, info, p, t, b - t - count,
+@@ -1848,8 +1846,6 @@ static bool fbcon_scroll(struct vc_data *vc, unsigned int t, unsigned int b,
+       case SM_DOWN:
+               if (count > vc->vc_rows)        /* Maximum realistic size */
+                       count = vc->vc_rows;
+-              if (logo_shown >= 0)
+-                      goto redraw_down;
+               switch (fb_scrollmode(p)) {
+               case SCROLL_MOVE:
+                       fbcon_redraw_blit(vc, info, p, b - 1, b - t - count,
+-- 
+2.35.1
+
diff --git a/queue-5.19/fbcon-fix-boundary-checks-for-fbcon-vc-n1-n2-paramet.patch b/queue-5.19/fbcon-fix-boundary-checks-for-fbcon-vc-n1-n2-paramet.patch

new file mode 100644 (file)

index 0000000..53e9eae
--- /dev/null
+++ b/queue-5.19/fbcon-fix-boundary-checks-for-fbcon-vc-n1-n2-paramet.patch
@@ -0,0 +1,59 @@
+From d9d27d24ba7501f40215af66b500159644245533 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 2 Jun 2022 22:06:28 +0200
+Subject: fbcon: Fix boundary checks for fbcon=vc:n1-n2 parameters
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit cad564ca557f8d3bb3b1fa965d9a2b3f6490ec69 ]
+
+The user may use the fbcon=vc:<n1>-<n2> option to tell fbcon to take
+over the given range (n1...n2) of consoles. The value for n1 and n2
+needs to be a positive number and up to (MAX_NR_CONSOLES - 1).
+The given values were not fully checked against those boundaries yet.
+
+To fix the issue, convert first_fb_vc and last_fb_vc to unsigned
+integers and check them against the upper boundary, and make sure that
+first_fb_vc is smaller than last_fb_vc.
+
+Cc: stable@vger.kernel.org # v4.19+
+Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Link: https://patchwork.freedesktop.org/patch/msgid/YpkYRMojilrtZIgM@p100
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/core/fbcon.c | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c
+index 1a1de5b4645c..b89075f3b6ab 100644
+--- a/drivers/video/fbdev/core/fbcon.c
++++ b/drivers/video/fbdev/core/fbcon.c
+@@ -125,8 +125,8 @@ static int logo_lines;
+    enums.  */
+ static int logo_shown = FBCON_LOGO_CANSHOW;
+ /* console mappings */
+-static int first_fb_vc;
+-static int last_fb_vc = MAX_NR_CONSOLES - 1;
++static unsigned int first_fb_vc;
++static unsigned int last_fb_vc = MAX_NR_CONSOLES - 1;
+ static int fbcon_is_default = 1; 
+ static int primary_device = -1;
+ static int fbcon_has_console_bind;
+@@ -440,10 +440,12 @@ static int __init fb_console_setup(char *this_opt)
+                       options += 3;
+                       if (*options)
+                               first_fb_vc = simple_strtoul(options, &options, 10) - 1;
+-                      if (first_fb_vc < 0)
++                      if (first_fb_vc >= MAX_NR_CONSOLES)
+                               first_fb_vc = 0;
+                       if (*options++ == '-')
+                               last_fb_vc = simple_strtoul(options, &options, 10) - 1;
++                      if (last_fb_vc < first_fb_vc || last_fb_vc >= MAX_NR_CONSOLES)
++                              last_fb_vc = MAX_NR_CONSOLES - 1;
+                       fbcon_is_default = 0; 
+                       continue;
+               }
+-- 
+2.35.1
+
diff --git a/queue-5.19/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch b/queue-5.19/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch

new file mode 100644 (file)

index 0000000..2dcf0e1
--- /dev/null
+++ b/queue-5.19/firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch
@@ -0,0 +1,156 @@
+From 43401118c05f89f211c3fb2c5e2eb0fec30a5815 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 17:03:10 +0100
+Subject: firmware: arm_scpi: Ensure scpi_info is not assigned if the probe
+ fails
+
+From: Sudeep Holla <sudeep.holla@arm.com>
+
+[ Upstream commit 689640efc0a2c4e07e6f88affe6d42cd40cc3f85 ]
+
+When scpi probe fails, at any point, we need to ensure that the scpi_info
+is not set and will remain NULL until the probe succeeds. If it is not
+taken care, then it could result use-after-free as the value is exported
+via get_scpi_ops() and could refer to a memory allocated via devm_kzalloc()
+but freed when the probe fails.
+
+Link: https://lore.kernel.org/r/20220701160310.148344-1-sudeep.holla@arm.com
+Cc: stable@vger.kernel.org # 4.19+
+Reported-by: huhai <huhai@kylinos.cn>
+Reviewed-by: Jackie Liu <liuyun01@kylinos.cn>
+Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/firmware/arm_scpi.c | 61 +++++++++++++++++++++----------------
+ 1 file changed, 35 insertions(+), 26 deletions(-)
+
+diff --git a/drivers/firmware/arm_scpi.c b/drivers/firmware/arm_scpi.c
+index ddf0b9ff9e15..435d0e2658a4 100644
+--- a/drivers/firmware/arm_scpi.c
++++ b/drivers/firmware/arm_scpi.c
+@@ -815,7 +815,7 @@ static int scpi_init_versions(struct scpi_drvinfo *info)
+               info->firmware_version = le32_to_cpu(caps.platform_version);
+       }
+       /* Ignore error if not implemented */
+-      if (scpi_info->is_legacy && ret == -EOPNOTSUPP)
++      if (info->is_legacy && ret == -EOPNOTSUPP)
+               return 0;
+ 
+       return ret;
+@@ -913,13 +913,14 @@ static int scpi_probe(struct platform_device *pdev)
+       struct resource res;
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
++      struct scpi_drvinfo *scpi_drvinfo;
+ 
+-      scpi_info = devm_kzalloc(dev, sizeof(*scpi_info), GFP_KERNEL);
+-      if (!scpi_info)
++      scpi_drvinfo = devm_kzalloc(dev, sizeof(*scpi_drvinfo), GFP_KERNEL);
++      if (!scpi_drvinfo)
+               return -ENOMEM;
+ 
+       if (of_match_device(legacy_scpi_of_match, &pdev->dev))
+-              scpi_info->is_legacy = true;
++              scpi_drvinfo->is_legacy = true;
+ 
+       count = of_count_phandle_with_args(np, "mboxes", "#mbox-cells");
+       if (count < 0) {
+@@ -927,19 +928,19 @@ static int scpi_probe(struct platform_device *pdev)
+               return -ENODEV;
+       }
+ 
+-      scpi_info->channels = devm_kcalloc(dev, count, sizeof(struct scpi_chan),
+-                                         GFP_KERNEL);
+-      if (!scpi_info->channels)
++      scpi_drvinfo->channels =
++              devm_kcalloc(dev, count, sizeof(struct scpi_chan), GFP_KERNEL);
++      if (!scpi_drvinfo->channels)
+               return -ENOMEM;
+ 
+-      ret = devm_add_action(dev, scpi_free_channels, scpi_info);
++      ret = devm_add_action(dev, scpi_free_channels, scpi_drvinfo);
+       if (ret)
+               return ret;
+ 
+-      for (; scpi_info->num_chans < count; scpi_info->num_chans++) {
++      for (; scpi_drvinfo->num_chans < count; scpi_drvinfo->num_chans++) {
+               resource_size_t size;
+-              int idx = scpi_info->num_chans;
+-              struct scpi_chan *pchan = scpi_info->channels + idx;
++              int idx = scpi_drvinfo->num_chans;
++              struct scpi_chan *pchan = scpi_drvinfo->channels + idx;
+               struct mbox_client *cl = &pchan->cl;
+               struct device_node *shmem = of_parse_phandle(np, "shmem", idx);
+ 
+@@ -986,45 +987,53 @@ static int scpi_probe(struct platform_device *pdev)
+               return ret;
+       }
+ 
+-      scpi_info->commands = scpi_std_commands;
++      scpi_drvinfo->commands = scpi_std_commands;
+ 
+-      platform_set_drvdata(pdev, scpi_info);
++      platform_set_drvdata(pdev, scpi_drvinfo);
+ 
+-      if (scpi_info->is_legacy) {
++      if (scpi_drvinfo->is_legacy) {
+               /* Replace with legacy variants */
+               scpi_ops.clk_set_val = legacy_scpi_clk_set_val;
+-              scpi_info->commands = scpi_legacy_commands;
++              scpi_drvinfo->commands = scpi_legacy_commands;
+ 
+               /* Fill priority bitmap */
+               for (idx = 0; idx < ARRAY_SIZE(legacy_hpriority_cmds); idx++)
+                       set_bit(legacy_hpriority_cmds[idx],
+-                              scpi_info->cmd_priority);
++                              scpi_drvinfo->cmd_priority);
+       }
+ 
+-      ret = scpi_init_versions(scpi_info);
++      scpi_info = scpi_drvinfo;
++
++      ret = scpi_init_versions(scpi_drvinfo);
+       if (ret) {
+               dev_err(dev, "incorrect or no SCP firmware found\n");
++              scpi_info = NULL;
+               return ret;
+       }
+ 
+-      if (scpi_info->is_legacy && !scpi_info->protocol_version &&
+-          !scpi_info->firmware_version)
++      if (scpi_drvinfo->is_legacy && !scpi_drvinfo->protocol_version &&
++          !scpi_drvinfo->firmware_version)
+               dev_info(dev, "SCP Protocol legacy pre-1.0 firmware\n");
+       else
+               dev_info(dev, "SCP Protocol %lu.%lu Firmware %lu.%lu.%lu version\n",
+                        FIELD_GET(PROTO_REV_MAJOR_MASK,
+-                                 scpi_info->protocol_version),
++                                 scpi_drvinfo->protocol_version),
+                        FIELD_GET(PROTO_REV_MINOR_MASK,
+-                                 scpi_info->protocol_version),
++                                 scpi_drvinfo->protocol_version),
+                        FIELD_GET(FW_REV_MAJOR_MASK,
+-                                 scpi_info->firmware_version),
++                                 scpi_drvinfo->firmware_version),
+                        FIELD_GET(FW_REV_MINOR_MASK,
+-                                 scpi_info->firmware_version),
++                                 scpi_drvinfo->firmware_version),
+                        FIELD_GET(FW_REV_PATCH_MASK,
+-                                 scpi_info->firmware_version));
+-      scpi_info->scpi_ops = &scpi_ops;
++                                 scpi_drvinfo->firmware_version));
++
++      scpi_drvinfo->scpi_ops = &scpi_ops;
+ 
+-      return devm_of_platform_populate(dev);
++      ret = devm_of_platform_populate(dev);
++      if (ret)
++              scpi_info = NULL;
++
++      return ret;
+ }
+ 
+ static const struct of_device_id scpi_of_match[] = {
+-- 
+2.35.1
+
diff --git a/queue-5.19/fix-short-copy-handling-in-copy_mc_pipe_to_iter.patch-23282 b/queue-5.19/fix-short-copy-handling-in-copy_mc_pipe_to_iter.patch-23282

new file mode 100644 (file)

index 0000000..9d208a8
--- /dev/null
+++ b/queue-5.19/fix-short-copy-handling-in-copy_mc_pipe_to_iter.patch-23282
@@ -0,0 +1,89 @@
+From efb1a337a02e06df948da844dc408e0fd50cd258 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 12 Jun 2022 19:50:29 -0400
+Subject: fix short copy handling in copy_mc_pipe_to_iter()
+
+From: Al Viro <viro@zeniv.linux.org.uk>
+
+[ Upstream commit c3497fd009ef2c59eea60d21c3ac22de3585ed7d ]
+
+Unlike other copying operations on ITER_PIPE, copy_mc_to_iter() can
+result in a short copy.  In that case we need to trim the unused
+buffers, as well as the length of partially filled one - it's not
+enough to set ->head, ->iov_offset and ->count to reflect how
+much had we copied.  Not hard to fix, fortunately...
+
+I'd put a helper (pipe_discard_from(pipe, head)) into pipe_fs_i.h,
+rather than iov_iter.c - it has nothing to do with iov_iter and
+having it will allow us to avoid an ugly kludge in fs/splice.c.
+We could put it into lib/iov_iter.c for now and move it later,
+but I don't see the point going that way...
+
+Cc: stable@kernel.org # 4.19+
+Fixes: ca146f6f091e "lib/iov_iter: Fix pipe handling in _copy_to_iter_mcsafe()"
+Reviewed-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Christian Brauner (Microsoft) <brauner@kernel.org>
+Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/pipe_fs_i.h |  9 +++++++++
+ lib/iov_iter.c            | 15 +++++++++++----
+ 2 files changed, 20 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
+index cb0fd633a610..4ea496924106 100644
+--- a/include/linux/pipe_fs_i.h
++++ b/include/linux/pipe_fs_i.h
+@@ -229,6 +229,15 @@ static inline bool pipe_buf_try_steal(struct pipe_inode_info *pipe,
+       return buf->ops->try_steal(pipe, buf);
+ }
+ 
++static inline void pipe_discard_from(struct pipe_inode_info *pipe,
++              unsigned int old_head)
++{
++      unsigned int mask = pipe->ring_size - 1;
++
++      while (pipe->head > old_head)
++              pipe_buf_release(pipe, &pipe->bufs[--pipe->head & mask]);
++}
++
+ /* Differs from PIPE_BUF in that PIPE_SIZE is the length of the actual
+    memory allocation, whereas PIPE_BUF makes atomicity guarantees.  */
+ #define PIPE_SIZE             PAGE_SIZE
+diff --git a/lib/iov_iter.c b/lib/iov_iter.c
+index 0b64695ab632..2bf20b48a04a 100644
+--- a/lib/iov_iter.c
++++ b/lib/iov_iter.c
+@@ -689,6 +689,7 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
+       struct pipe_inode_info *pipe = i->pipe;
+       unsigned int p_mask = pipe->ring_size - 1;
+       unsigned int i_head;
++      unsigned int valid = pipe->head;
+       size_t n, off, xfer = 0;
+ 
+       if (!sanity(i))
+@@ -702,11 +703,17 @@ static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
+               rem = copy_mc_to_kernel(p + off, addr + xfer, chunk);
+               chunk -= rem;
+               kunmap_local(p);
+-              i->head = i_head;
+-              i->iov_offset = off + chunk;
+-              xfer += chunk;
+-              if (rem)
++              if (chunk) {
++                      i->head = i_head;
++                      i->iov_offset = off + chunk;
++                      xfer += chunk;
++                      valid = i_head + 1;
++              }
++              if (rem) {
++                      pipe->bufs[i_head & p_mask].len -= rem;
++                      pipe_discard_from(pipe, valid);
+                       break;
++              }
+               n -= chunk;
+               off = 0;
+               i_head++;
+-- 
+2.35.1
+
diff --git a/queue-5.19/ftrace-x86-add-back-ftrace_expected-assignment.patch-6434 b/queue-5.19/ftrace-x86-add-back-ftrace_expected-assignment.patch-6434

new file mode 100644 (file)

index 0000000..a121b49
--- /dev/null
+++ b/queue-5.19/ftrace-x86-add-back-ftrace_expected-assignment.patch-6434
@@ -0,0 +1,49 @@
+From 64fc62d5d534496e9c262c5f45b6053c5d019208 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 10:18:51 -0400
+Subject: ftrace/x86: Add back ftrace_expected assignment
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit ac6c1b2ca77e722a1e5d651f12f437f2f237e658 ]
+
+When a ftrace_bug happens (where ftrace fails to modify a location) it is
+helpful to have what was at that location as well as what was expected to
+be there.
+
+But with the conversion to text_poke() the variable that assigns the
+expected for debugging was dropped. Unfortunately, I noticed this when I
+needed it. Add it back.
+
+Link: https://lkml.kernel.org/r/20220726101851.069d2e70@gandalf.local.home
+
+Cc: "x86@kernel.org" <x86@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Borislav Petkov <bp@alien8.de>
+Cc: "H. Peter Anvin" <hpa@zytor.com>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: stable@vger.kernel.org
+Fixes: 768ae4406a5c ("x86/ftrace: Use text_poke()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/ftrace.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
+index 24b9fa89aa27..bd165004776d 100644
+--- a/arch/x86/kernel/ftrace.c
++++ b/arch/x86/kernel/ftrace.c
+@@ -91,6 +91,7 @@ static int ftrace_verify_code(unsigned long ip, const char *old_code)
+ 
+       /* Make sure it is what we expect it to be */
+       if (memcmp(cur_code, old_code, MCOUNT_INSN_SIZE) != 0) {
++              ftrace_expected = old_code;
+               WARN_ON(1);
+               return -EINVAL;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.19/fuse-fix-deadlock-between-atomic-o_trunc-and-page-in.patch b/queue-5.19/fuse-fix-deadlock-between-atomic-o_trunc-and-page-in.patch

new file mode 100644 (file)

index 0000000..656a093
--- /dev/null
+++ b/queue-5.19/fuse-fix-deadlock-between-atomic-o_trunc-and-page-in.patch
@@ -0,0 +1,176 @@
+From 6a8e4273ffe4f780a39f1c0677b80a21e832dc9d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Apr 2022 15:48:53 +0200
+Subject: fuse: fix deadlock between atomic O_TRUNC and page invalidation
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 2fdbb8dd01556e1501132b5ad3826e8f71e24a8b ]
+
+fuse_finish_open() will be called with FUSE_NOWRITE set in case of atomic
+O_TRUNC open(), so commit 76224355db75 ("fuse: truncate pagecache on
+atomic_o_trunc") replaced invalidate_inode_pages2() by truncate_pagecache()
+in such a case to avoid the A-A deadlock. However, we found another A-B-B-A
+deadlock related to the case above, which will cause the xfstests
+generic/464 testcase hung in our virtio-fs test environment.
+
+For example, consider two processes concurrently open one same file, one
+with O_TRUNC and another without O_TRUNC. The deadlock case is described
+below, if open(O_TRUNC) is already set_nowrite(acquired A), and is trying
+to lock a page (acquiring B), open() could have held the page lock
+(acquired B), and waiting on the page writeback (acquiring A). This would
+lead to deadlocks.
+
+open(O_TRUNC)
+----------------------------------------------------------------
+fuse_open_common
+  inode_lock            [C acquire]
+  fuse_set_nowrite      [A acquire]
+
+  fuse_finish_open
+    truncate_pagecache
+      lock_page         [B acquire]
+      truncate_inode_page
+      unlock_page       [B release]
+
+  fuse_release_nowrite  [A release]
+  inode_unlock          [C release]
+----------------------------------------------------------------
+
+open()
+----------------------------------------------------------------
+fuse_open_common
+  fuse_finish_open
+    invalidate_inode_pages2
+      lock_page         [B acquire]
+        fuse_launder_page
+          fuse_wait_on_page_writeback [A acquire & release]
+      unlock_page       [B release]
+----------------------------------------------------------------
+
+Besides this case, all calls of invalidate_inode_pages2() and
+invalidate_inode_pages2_range() in fuse code also can deadlock with
+open(O_TRUNC).
+
+Fix by moving the truncate_pagecache() call outside the nowrite protected
+region.  The nowrite protection is only for delayed writeback
+(writeback_cache) case, where inode lock does not protect against
+truncation racing with writes on the server.  Write syscalls racing with
+page cache truncation still get the inode lock protection.
+
+This patch also changes the order of filemap_invalidate_lock()
+vs. fuse_set_nowrite() in fuse_open_common().  This new order matches the
+order found in fuse_file_fallocate() and fuse_do_setattr().
+
+Reported-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Tested-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Fixes: e4648309b85a ("fuse: truncate pending writes on O_TRUNC")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/dir.c  |  7 ++++++-
+ fs/fuse/file.c | 30 +++++++++++++++++-------------
+ 2 files changed, 23 insertions(+), 14 deletions(-)
+
+diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
+index 74303d6e987b..a93d675a726a 100644
+--- a/fs/fuse/dir.c
++++ b/fs/fuse/dir.c
+@@ -537,6 +537,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+       struct fuse_file *ff;
+       void *security_ctx = NULL;
+       u32 security_ctxlen;
++      bool trunc = flags & O_TRUNC;
+ 
+       /* Userspace expects S_IFREG in create mode */
+       BUG_ON((mode & S_IFMT) != S_IFREG);
+@@ -561,7 +562,7 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+       inarg.mode = mode;
+       inarg.umask = current_umask();
+ 
+-      if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
++      if (fm->fc->handle_killpriv_v2 && trunc &&
+           !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+               inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+       }
+@@ -623,6 +624,10 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
+       } else {
+               file->private_data = ff;
+               fuse_finish_open(inode, file);
++              if (fm->fc->atomic_o_trunc && trunc)
++                      truncate_pagecache(inode, 0);
++              else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++                      invalidate_inode_pages2(inode->i_mapping);
+       }
+       return err;
+ 
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 60885ff9157c..dfee142bca5c 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -210,13 +210,9 @@ void fuse_finish_open(struct inode *inode, struct file *file)
+               fi->attr_version = atomic64_inc_return(&fc->attr_version);
+               i_size_write(inode, 0);
+               spin_unlock(&fi->lock);
+-              truncate_pagecache(inode, 0);
+               file_update_time(file);
+               fuse_invalidate_attr_mask(inode, FUSE_STATX_MODSIZE);
+-      } else if (!(ff->open_flags & FOPEN_KEEP_CACHE)) {
+-              invalidate_inode_pages2(inode->i_mapping);
+       }
+-
+       if ((file->f_mode & FMODE_WRITE) && fc->writeback_cache)
+               fuse_link_write_file(file);
+ }
+@@ -239,30 +235,38 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
+       if (err)
+               return err;
+ 
+-      if (is_wb_truncate || dax_truncate) {
++      if (is_wb_truncate || dax_truncate)
+               inode_lock(inode);
+-              fuse_set_nowrite(inode);
+-      }
+ 
+       if (dax_truncate) {
+               filemap_invalidate_lock(inode->i_mapping);
+               err = fuse_dax_break_layouts(inode, 0, 0);
+               if (err)
+-                      goto out;
++                      goto out_inode_unlock;
+       }
+ 
++      if (is_wb_truncate || dax_truncate)
++              fuse_set_nowrite(inode);
++
+       err = fuse_do_open(fm, get_node_id(inode), file, isdir);
+       if (!err)
+               fuse_finish_open(inode, file);
+ 
+-out:
++      if (is_wb_truncate || dax_truncate)
++              fuse_release_nowrite(inode);
++      if (!err) {
++              struct fuse_file *ff = file->private_data;
++
++              if (fc->atomic_o_trunc && (file->f_flags & O_TRUNC))
++                      truncate_pagecache(inode, 0);
++              else if (!(ff->open_flags & FOPEN_KEEP_CACHE))
++                      invalidate_inode_pages2(inode->i_mapping);
++      }
+       if (dax_truncate)
+               filemap_invalidate_unlock(inode->i_mapping);
+-
+-      if (is_wb_truncate | dax_truncate) {
+-              fuse_release_nowrite(inode);
++out_inode_unlock:
++      if (is_wb_truncate || dax_truncate)
+               inode_unlock(inode);
+-      }
+ 
+       return err;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.19/fuse-ioctl-translate-enosys.patch-17448 b/queue-5.19/fuse-ioctl-translate-enosys.patch-17448

new file mode 100644 (file)

index 0000000..7cb118a
--- /dev/null
+++ b/queue-5.19/fuse-ioctl-translate-enosys.patch-17448
@@ -0,0 +1,89 @@
+From 9a76a72661a15caa9ccd7884caa7f17bffb7a9ea Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 16:06:18 +0200
+Subject: fuse: ioctl: translate ENOSYS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 02c0cab8e7345b06f1c0838df444e2902e4138d3 ]
+
+Overlayfs may fail to complete updates when a filesystem lacks
+fileattr/xattr syscall support and responds with an ENOSYS error code,
+resulting in an unexpected "Function not implemented" error.
+
+This bug may occur with FUSE filesystems, such as davfs2.
+
+Steps to reproduce:
+
+  # install davfs2, e.g., apk add davfs2
+  mkdir /test mkdir /test/lower /test/upper /test/work /test/mnt
+  yes '' | mount -t davfs -o ro http://some-web-dav-server/path \
+    /test/lower
+  mount -t overlay -o upperdir=/test/upper,lowerdir=/test/lower \
+    -o workdir=/test/work overlay /test/mnt
+
+  # when "some-file" exists in the lowerdir, this fails with "Function
+  # not implemented", with dmesg showing "overlayfs: failed to retrieve
+  # lower fileattr (/some-file, err=-38)"
+  touch /test/mnt/some-file
+
+The underlying cause of this regresion is actually in FUSE, which fails to
+translate the ENOSYS error code returned by userspace filesystem (which
+means that the ioctl operation is not supported) to ENOTTY.
+
+Reported-by: Christian Kohlschütter <christian@kohlschutter.com>
+Fixes: 72db82115d2b ("ovl: copy up sync/noatime fileattr flags")
+Fixes: 59efec7b9039 ("fuse: implement ioctl support")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/ioctl.c | 15 +++++++++++++--
+ 1 file changed, 13 insertions(+), 2 deletions(-)
+
+diff --git a/fs/fuse/ioctl.c b/fs/fuse/ioctl.c
+index 33cde4bbccdc..61d8afcb10a3 100644
+--- a/fs/fuse/ioctl.c
++++ b/fs/fuse/ioctl.c
+@@ -9,6 +9,17 @@
+ #include <linux/compat.h>
+ #include <linux/fileattr.h>
+ 
++static ssize_t fuse_send_ioctl(struct fuse_mount *fm, struct fuse_args *args)
++{
++      ssize_t ret = fuse_simple_request(fm, args);
++
++      /* Translate ENOSYS, which shouldn't be returned from fs */
++      if (ret == -ENOSYS)
++              ret = -ENOTTY;
++
++      return ret;
++}
++
+ /*
+  * CUSE servers compiled on 32bit broke on 64bit kernels because the
+  * ABI was defined to be 'struct iovec' which is different on 32bit
+@@ -259,7 +270,7 @@ long fuse_do_ioctl(struct file *file, unsigned int cmd, unsigned long arg,
+       ap.args.out_pages = true;
+       ap.args.out_argvar = true;
+ 
+-      transferred = fuse_simple_request(fm, &ap.args);
++      transferred = fuse_send_ioctl(fm, &ap.args);
+       err = transferred;
+       if (transferred < 0)
+               goto out;
+@@ -393,7 +404,7 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
+       args.out_args[1].size = inarg.out_size;
+       args.out_args[1].value = ptr;
+ 
+-      err = fuse_simple_request(fm, &args);
++      err = fuse_send_ioctl(fm, &args);
+       if (!err) {
+               if (outarg.result < 0)
+                       err = outarg.result;
+-- 
+2.35.1
+
diff --git a/queue-5.19/fuse-limit-nsec.patch-2050 b/queue-5.19/fuse-limit-nsec.patch-2050

new file mode 100644 (file)

index 0000000..4ac1a55
--- /dev/null
+++ b/queue-5.19/fuse-limit-nsec.patch-2050
@@ -0,0 +1,39 @@
+From 415837824b69f800cea848df2bc04c46605413db Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 16:06:18 +0200
+Subject: fuse: limit nsec
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 47912eaa061a6a81e4aa790591a1874c650733c0 ]
+
+Limit nanoseconds to 0..999999999.
+
+Fixes: d8a5ba45457e ("[PATCH] FUSE - core")
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/inode.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
+index 8c0665c5dff8..7c290089e693 100644
+--- a/fs/fuse/inode.c
++++ b/fs/fuse/inode.c
+@@ -180,6 +180,12 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
+       inode->i_uid     = make_kuid(fc->user_ns, attr->uid);
+       inode->i_gid     = make_kgid(fc->user_ns, attr->gid);
+       inode->i_blocks  = attr->blocks;
++
++      /* Sanitize nsecs */
++      attr->atimensec = min_t(u32, attr->atimensec, NSEC_PER_SEC - 1);
++      attr->mtimensec = min_t(u32, attr->mtimensec, NSEC_PER_SEC - 1);
++      attr->ctimensec = min_t(u32, attr->ctimensec, NSEC_PER_SEC - 1);
++
+       inode->i_atime.tv_sec   = attr->atime;
+       inode->i_atime.tv_nsec  = attr->atimensec;
+       /* mtime from server may be stale due to local buffered write */
+-- 
+2.35.1
+
diff --git a/queue-5.19/fuse-write-inode-in-fuse_release.patch-28840 b/queue-5.19/fuse-write-inode-in-fuse_release.patch-28840

new file mode 100644 (file)

index 0000000..ed7398e
--- /dev/null
+++ b/queue-5.19/fuse-write-inode-in-fuse_release.patch-28840
@@ -0,0 +1,48 @@
+From 8674a3b26deade1bbd04e752246293b72bf6863c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 20 Apr 2022 16:05:41 +0200
+Subject: fuse: write inode in fuse_release()
+
+From: Miklos Szeredi <mszeredi@redhat.com>
+
+[ Upstream commit 035ff33cf4db101250fb980a3941bf078f37a544 ]
+
+A race between write(2) and close(2) allows pages to be dirtied after
+fuse_flush -> write_inode_now().  If these pages are not flushed from
+fuse_release(), then there might not be a writable open file later.  So any
+remaining dirty pages must be written back before the file is released.
+
+This is a partial revert of the blamed commit.
+
+Reported-by: syzbot+6e1efbd8efaaa6860e91@syzkaller.appspotmail.com
+Fixes: 36ea23374d1f ("fuse: write inode in fuse_vma_close() instead of fuse_release()")
+Cc: <stable@vger.kernel.org> # v5.16
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/fuse/file.c | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/fs/fuse/file.c b/fs/fuse/file.c
+index 05caa2b9272e..60885ff9157c 100644
+--- a/fs/fuse/file.c
++++ b/fs/fuse/file.c
+@@ -338,6 +338,15 @@ static int fuse_open(struct inode *inode, struct file *file)
+ 
+ static int fuse_release(struct inode *inode, struct file *file)
+ {
++      struct fuse_conn *fc = get_fuse_conn(inode);
++
++      /*
++       * Dirty pages might remain despite write_inode_now() call from
++       * fuse_flush() due to writes racing with the close.
++       */
++      if (fc->writeback_cache)
++              write_inode_now(inode, 1);
++
+       fuse_release_common(file, false);
+ 
+       /* return value is ignored by VFS */
+-- 
+2.35.1
+
diff --git a/queue-5.19/hid-hid-input-add-surface-go-battery-quirk.patch-7851 b/queue-5.19/hid-hid-input-add-surface-go-battery-quirk.patch-7851

new file mode 100644 (file)

index 0000000..112f496
--- /dev/null
+++ b/queue-5.19/hid-hid-input-add-surface-go-battery-quirk.patch-7851
@@ -0,0 +1,54 @@
+From bfb446268f6178e53244ad8b7cc1a6b20435f790 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 26 May 2022 01:08:27 +0200
+Subject: HID: hid-input: add Surface Go battery quirk
+
+From: Maximilian Luz <luzmaximilian@gmail.com>
+
+[ Upstream commit db925d809011c37b246434fdce71209fc2e6c0c2 ]
+
+Similar to the Surface Go (1), the (Elantech) touchscreen/digitizer in
+the Surface Go 2 mistakenly reports the battery of the stylus. Instead
+of over the touchscreen device, battery information is provided via
+bluetooth and the touchscreen device reports an empty battery.
+
+Apply the HID_BATTERY_QUIRK_IGNORE quirk to ignore this battery and
+prevent the erroneous low battery warnings.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Maximilian Luz <luzmaximilian@gmail.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-ids.h   | 1 +
+ drivers/hid/hid-input.c | 2 ++
+ 2 files changed, 3 insertions(+)
+
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index d9eb676abe96..9c4e92a9c646 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -413,6 +413,7 @@
+ #define USB_DEVICE_ID_ASUS_UX550VE_TOUCHSCREEN        0x2544
+ #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN  0x2706
+ #define I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN  0x261A
++#define I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN 0x2A1C
+ 
+ #define USB_VENDOR_ID_ELECOM          0x056e
+ #define USB_DEVICE_ID_ELECOM_BM084    0x0061
+diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
+index c6b27aab9041..48c1c02c69f4 100644
+--- a/drivers/hid/hid-input.c
++++ b/drivers/hid/hid-input.c
+@@ -381,6 +381,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
+         HID_BATTERY_QUIRK_IGNORE },
+       { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO_TOUCHSCREEN),
+         HID_BATTERY_QUIRK_IGNORE },
++      { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_SURFACE_GO2_TOUCHSCREEN),
++        HID_BATTERY_QUIRK_IGNORE },
+       {}
+ };
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/hid-nintendo-add-missing-array-termination.patch-24808 b/queue-5.19/hid-nintendo-add-missing-array-termination.patch-24808

new file mode 100644 (file)

index 0000000..9700f70
--- /dev/null
+++ b/queue-5.19/hid-nintendo-add-missing-array-termination.patch-24808
@@ -0,0 +1,43 @@
+From fda2e32fadf0b81d5d65e6535b400165619be6c8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 15:17:05 -0700
+Subject: HID: nintendo: Add missing array termination
+
+From: Guenter Roeck <linux@roeck-us.net>
+
+[ Upstream commit ab5f3404b7762b88403fbddbdda6b1b464bd6cbc ]
+
+joycon_dpad_inputs_jc[] is unterminated. This may result in odd warnings
+such as
+
+input: input_set_capability: invalid code 3077588140 for type 1
+
+or in kernel crashes in nintendo_hid_probe(). Terminate the array to fix
+the problem.
+
+Fixes: 2af16c1f846bd ("HID: nintendo: add nintendo switch controller driver")
+Cc: Daniel J. Ogorchock <djogorchock@gmail.com>
+Signed-off-by: Guenter Roeck <linux@roeck-us.net>
+Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-nintendo.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/hid/hid-nintendo.c b/drivers/hid/hid-nintendo.c
+index 2204de889739..4b1173957c17 100644
+--- a/drivers/hid/hid-nintendo.c
++++ b/drivers/hid/hid-nintendo.c
+@@ -1586,6 +1586,7 @@ static const unsigned int joycon_button_inputs_r[] = {
+ /* We report joy-con d-pad inputs as buttons and pro controller as a hat. */
+ static const unsigned int joycon_dpad_inputs_jc[] = {
+       BTN_DPAD_UP, BTN_DPAD_DOWN, BTN_DPAD_LEFT, BTN_DPAD_RIGHT,
++      0 /* 0 signals end of array */
+ };
+ 
+ static int joycon_input_create(struct joycon_ctlr *ctlr)
+-- 
+2.35.1
+
diff --git a/queue-5.19/hid-wacom-don-t-register-pad_input-for-touch-switch.patch-820 b/queue-5.19/hid-wacom-don-t-register-pad_input-for-touch-switch.patch-820

new file mode 100644 (file)

index 0000000..8b87375
--- /dev/null
+++ b/queue-5.19/hid-wacom-don-t-register-pad_input-for-touch-switch.patch-820
@@ -0,0 +1,114 @@
+From 0137a15938b0f57b5698e4493c44ca69b3a208c3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 May 2022 14:52:37 -0700
+Subject: HID: wacom: Don't register pad_input for touch switch
+
+From: Ping Cheng <pinglinux@gmail.com>
+
+[ Upstream commit d6b675687a4ab4dba684716d97c8c6f81bf10905 ]
+
+Touch switch state is received through WACOM_PAD_FIELD. However, it
+is reported by touch_input. Don't register pad_input if no other pad
+events require the interface.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
+Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/wacom_sys.c |  2 +-
+ drivers/hid/wacom_wac.c | 43 ++++++++++++++++++++++++-----------------
+ 2 files changed, 26 insertions(+), 19 deletions(-)
+
+diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
+index 620fe74f5676..98384b911288 100644
+--- a/drivers/hid/wacom_sys.c
++++ b/drivers/hid/wacom_sys.c
+@@ -2121,7 +2121,7 @@ static int wacom_register_inputs(struct wacom *wacom)
+ 
+       error = wacom_setup_pad_input_capabilities(pad_input_dev, wacom_wac);
+       if (error) {
+-              /* no pad in use on this interface */
++              /* no pad events using this interface */
+               input_free_device(pad_input_dev);
+               wacom_wac->pad_input = NULL;
+               pad_input_dev = NULL;
+diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
+index 866b484b82de..f8cc4bb3e3a7 100644
+--- a/drivers/hid/wacom_wac.c
++++ b/drivers/hid/wacom_wac.c
+@@ -2019,7 +2019,6 @@ static void wacom_wac_pad_usage_mapping(struct hid_device *hdev,
+               wacom_wac->has_mute_touch_switch = true;
+               usage->type = EV_SW;
+               usage->code = SW_MUTE_DEVICE;
+-              features->device_type |= WACOM_DEVICETYPE_PAD;
+               break;
+       case WACOM_HID_WD_TOUCHSTRIP:
+               wacom_map_usage(input, usage, field, EV_ABS, ABS_RX, 0);
+@@ -2099,6 +2098,30 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
+                       wacom_wac->hid_data.inrange_state |= value;
+       }
+ 
++      /* Process touch switch state first since it is reported through touch interface,
++       * which is indepentent of pad interface. In the case when there are no other pad
++       * events, the pad interface will not even be created.
++       */
++      if ((equivalent_usage == WACOM_HID_WD_MUTE_DEVICE) ||
++         (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)) {
++              if (wacom_wac->shared->touch_input) {
++                      bool *is_touch_on = &wacom_wac->shared->is_touch_on;
++
++                      if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value)
++                              *is_touch_on = !(*is_touch_on);
++                      else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)
++                              *is_touch_on = value;
++
++                      input_report_switch(wacom_wac->shared->touch_input,
++                                          SW_MUTE_DEVICE, !(*is_touch_on));
++                      input_sync(wacom_wac->shared->touch_input);
++              }
++              return;
++      }
++
++      if (!input)
++              return;
++
+       switch (equivalent_usage) {
+       case WACOM_HID_WD_TOUCHRING:
+               /*
+@@ -2134,22 +2157,6 @@ static void wacom_wac_pad_event(struct hid_device *hdev, struct hid_field *field
+                       input_event(input, usage->type, usage->code, 0);
+               break;
+ 
+-      case WACOM_HID_WD_MUTE_DEVICE:
+-      case WACOM_HID_WD_TOUCHONOFF:
+-              if (wacom_wac->shared->touch_input) {
+-                      bool *is_touch_on = &wacom_wac->shared->is_touch_on;
+-
+-                      if (equivalent_usage == WACOM_HID_WD_MUTE_DEVICE && value)
+-                              *is_touch_on = !(*is_touch_on);
+-                      else if (equivalent_usage == WACOM_HID_WD_TOUCHONOFF)
+-                              *is_touch_on = value;
+-
+-                      input_report_switch(wacom_wac->shared->touch_input,
+-                                          SW_MUTE_DEVICE, !(*is_touch_on));
+-                      input_sync(wacom_wac->shared->touch_input);
+-              }
+-              break;
+-
+       case WACOM_HID_WD_MODE_CHANGE:
+               if (wacom_wac->is_direct_mode != value) {
+                       wacom_wac->is_direct_mode = value;
+@@ -2835,7 +2842,7 @@ void wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+       /* usage tests must precede field tests */
+       if (WACOM_BATTERY_USAGE(usage))
+               wacom_wac_battery_event(hdev, field, usage, value);
+-      else if (WACOM_PAD_FIELD(field) && wacom->wacom_wac.pad_input)
++      else if (WACOM_PAD_FIELD(field))
+               wacom_wac_pad_event(hdev, field, usage, value);
+       else if (WACOM_PEN_FIELD(field) && wacom->wacom_wac.pen_input)
+               wacom_wac_pen_event(hdev, field, usage, value);
+-- 
+2.35.1
+
diff --git a/queue-5.19/hid-wacom-only-report-rotation-for-art-pen.patch-25074 b/queue-5.19/hid-wacom-only-report-rotation-for-art-pen.patch-25074

new file mode 100644 (file)

index 0000000..4bbf945
--- /dev/null
+++ b/queue-5.19/hid-wacom-only-report-rotation-for-art-pen.patch-25074
@@ -0,0 +1,101 @@
+From 45bea7616fc78940408008d1bbb3b8ce92304f93 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 13 May 2022 14:51:56 -0700
+Subject: HID: wacom: Only report rotation for art pen
+
+From: Ping Cheng <pinglinux@gmail.com>
+
+[ Upstream commit 7ccced33a0ba39b0103ae1dfbf7f1dffdc0a1bc2 ]
+
+The generic routine, wacom_wac_pen_event, turns rotation value 90
+degree anti-clockwise before posting the events. This non-zero
+event trggers a non-zero ABS_Z event for non art pen tools. However,
+HID_DG_TWIST is only supported by art pen.
+
+[jkosina@suse.cz: fix build: add missing brace]
+Cc: stable@vger.kernel.org
+Signed-off-by: Ping Cheng <ping.cheng@wacom.com>
+Reviewed-by: Jason Gerecke <jason.gerecke@wacom.com>
+--
+Hi Jiri,
+
+This is kind of a version 2 of the last one I posted two days ago.
+I updated the logic so it has less changed lines: 29 vs 158! Hopefully,
+the logic is easier to follow now. Please ignore the last one.
+
+Thank you!
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/wacom_wac.c | 29 +++++++++++++++++++++--------
+ 1 file changed, 21 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
+index 9470c2b0b529..866b484b82de 100644
+--- a/drivers/hid/wacom_wac.c
++++ b/drivers/hid/wacom_wac.c
+@@ -638,9 +638,26 @@ static int wacom_intuos_id_mangle(int tool_id)
+       return (tool_id & ~0xFFF) << 4 | (tool_id & 0xFFF);
+ }
+ 
++static bool wacom_is_art_pen(int tool_id)
++{
++      bool is_art_pen = false;
++
++      switch (tool_id) {
++      case 0x885:     /* Intuos3 Marker Pen */
++      case 0x804:     /* Intuos4/5 13HD/24HD Marker Pen */
++      case 0x10804:   /* Intuos4/5 13HD/24HD Art Pen */
++              is_art_pen = true;
++              break;
++      }
++      return is_art_pen;
++}
++
+ static int wacom_intuos_get_tool_type(int tool_id)
+ {
+-      int tool_type;
++      int tool_type = BTN_TOOL_PEN;
++
++      if (wacom_is_art_pen(tool_id))
++              return tool_type;
+ 
+       switch (tool_id) {
+       case 0x812: /* Inking pen */
+@@ -655,12 +672,9 @@ static int wacom_intuos_get_tool_type(int tool_id)
+       case 0x852:
+       case 0x823: /* Intuos3 Grip Pen */
+       case 0x813: /* Intuos3 Classic Pen */
+-      case 0x885: /* Intuos3 Marker Pen */
+       case 0x802: /* Intuos4/5 13HD/24HD General Pen */
+-      case 0x804: /* Intuos4/5 13HD/24HD Marker Pen */
+       case 0x8e2: /* IntuosHT2 pen */
+       case 0x022:
+-      case 0x10804: /* Intuos4/5 13HD/24HD Art Pen */
+       case 0x10842: /* MobileStudio Pro Pro Pen slim */
+       case 0x14802: /* Intuos4/5 13HD/24HD Classic Pen */
+       case 0x16802: /* Cintiq 13HD Pro Pen */
+@@ -718,10 +732,6 @@ static int wacom_intuos_get_tool_type(int tool_id)
+       case 0x10902: /* Intuos4/5 13HD/24HD Airbrush */
+               tool_type = BTN_TOOL_AIRBRUSH;
+               break;
+-
+-      default: /* Unknown tool */
+-              tool_type = BTN_TOOL_PEN;
+-              break;
+       }
+       return tool_type;
+ }
+@@ -2336,6 +2346,9 @@ static void wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field
+               }
+               return;
+       case HID_DG_TWIST:
++              /* don't modify the value if the pen doesn't support the feature */
++              if (!wacom_is_art_pen(wacom_wac->id[0])) return;
++
+               /*
+                * Userspace expects pen twist to have its zero point when
+                * the buttons/finger is on the tablet's left. HID values
+-- 
+2.35.1
+
diff --git a/queue-5.19/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch b/queue-5.19/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch

new file mode 100644 (file)

index 0000000..7cbf7db
--- /dev/null
+++ b/queue-5.19/hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch
@@ -0,0 +1,43 @@
+From 1e4970560d5e4c20b18857626908025823dc9fab Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 15:38:04 +0800
+Subject: hugetlb_cgroup: fix wrong hugetlb cgroup numa stat
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit 2727cfe4072a35ce813e3708f74c135de7da8897 ]
+
+We forget to set cft->private for numa stat file.  As a result, numa stat
+of hstates[0] is always showed for all hstates.  Encode the hstates index
+into cft->private to fix this issue.
+
+Link: https://lkml.kernel.org/r/20220723073804.53035-1-linmiaohe@huawei.com
+Fixes: f47761999052 ("hugetlb: add hugetlb.*.numa_stat file")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Acked-by: Muchun Song <songmuchun@bytedance.com>
+Cc: Kees Cook <keescook@chromium.org>
+Cc: Mike Kravetz <mike.kravetz@oracle.com>
+Cc: Mina Almasry <almasrymina@google.com>
+Cc: Shakeel Butt <shakeelb@google.com>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/hugetlb_cgroup.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
+index f9942841df18..c86691c431fd 100644
+--- a/mm/hugetlb_cgroup.c
++++ b/mm/hugetlb_cgroup.c
+@@ -772,6 +772,7 @@ static void __init __hugetlb_cgroup_file_dfl_init(int idx)
+       /* Add the numa stat file */
+       cft = &h->cgroup_files_dfl[6];
+       snprintf(cft->name, MAX_CFTYPE_NAME, "%s.numa_stat", buf);
++      cft->private = MEMFILE_PRIVATE(idx, 0);
+       cft->seq_show = hugetlb_cgroup_read_numa_stat;
+       cft->flags = CFTYPE_NOT_ON_ROOT;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/ia64-processor-fix-wincompatible-pointer-types-in-ia.patch b/queue-5.19/ia64-processor-fix-wincompatible-pointer-types-in-ia.patch

new file mode 100644 (file)

index 0000000..4f075fb
--- /dev/null
+++ b/queue-5.19/ia64-processor-fix-wincompatible-pointer-types-in-ia.patch
@@ -0,0 +1,49 @@
+From 8d278756aab6ff9b002539a96ddecedc470c5fcc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 24 Jun 2022 14:13:05 +0200
+Subject: ia64, processor: fix -Wincompatible-pointer-types in ia64_get_irr()
+
+From: Alexander Lobakin <alexandr.lobakin@intel.com>
+
+[ Upstream commit e5a16a5c4602c119262f350274021f90465f479d ]
+
+test_bit(), as any other bitmap op, takes `unsigned long *` as a
+second argument (pointer to the actual bitmap), as any bitmap
+itself is an array of unsigned longs. However, the ia64_get_irr()
+code passes a ref to `u64` as a second argument.
+This works with the ia64 bitops implementation due to that they
+have `void *` as the second argument and then cast it later on.
+This works with the bitmap API itself due to that `unsigned long`
+has the same size on ia64 as `u64` (`unsigned long long`), but
+from the compiler PoV those two are different.
+Define @irr as `unsigned long` to fix that. That implies no
+functional changes. Has been hidden for 16 years!
+
+Fixes: a58786917ce2 ("[IA64] avoid broken SAL_CACHE_FLUSH implementations")
+Cc: stable@vger.kernel.org # 2.6.16+
+Reported-by: kernel test robot <lkp@intel.com>
+Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/ia64/include/asm/processor.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/ia64/include/asm/processor.h b/arch/ia64/include/asm/processor.h
+index 7cbce290f4e5..757c2f6d8d4b 100644
+--- a/arch/ia64/include/asm/processor.h
++++ b/arch/ia64/include/asm/processor.h
+@@ -538,7 +538,7 @@ ia64_get_irr(unsigned int vector)
+ {
+       unsigned int reg = vector / 64;
+       unsigned int bit = vector % 64;
+-      u64 irr;
++      unsigned long irr;
+ 
+       switch (reg) {
+       case 0: irr = ia64_getreg(_IA64_REG_CR_IRR0); break;
+-- 
+2.35.1
+
diff --git a/queue-5.19/iio-fix-iio_format_avail_range-printing-for-none-iio.patch b/queue-5.19/iio-fix-iio_format_avail_range-printing-for-none-iio.patch

new file mode 100644 (file)

index 0000000..e955852
--- /dev/null
+++ b/queue-5.19/iio-fix-iio_format_avail_range-printing-for-none-iio.patch
@@ -0,0 +1,59 @@
+From 574bac6c5e4c8f3d6063039b1d9b4d006a170357 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 15:07:06 +0200
+Subject: iio: fix iio_format_avail_range() printing for none IIO_VAL_INT
+
+From: Fawzi Khaber <fawzi.khaber@tdk.com>
+
+[ Upstream commit 5e1f91850365de55ca74945866c002fda8f00331 ]
+
+iio_format_avail_range() should print range as follow [min, step, max], so
+the function was previously calling iio_format_list() with length = 3,
+length variable refers to the array size of values not the number of
+elements. In case of non IIO_VAL_INT values each element has integer part
+and decimal part. With length = 3 this would cause premature end of loop
+and result in printing only one element.
+
+Signed-off-by: Fawzi Khaber <fawzi.khaber@tdk.com>
+Signed-off-by: Jean-Baptiste Maneyrol <jean-baptiste.maneyrol@tdk.com>
+Fixes: eda20ba1e25e ("iio: core: Consolidate iio_format_avail_{list,range}()")
+Link: https://lore.kernel.org/r/20220718130706.32571-1-jmaneyrol@invensense.com
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/industrialio-core.c | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/iio/industrialio-core.c b/drivers/iio/industrialio-core.c
+index adf054c7a75e..299ae3ad2fe5 100644
+--- a/drivers/iio/industrialio-core.c
++++ b/drivers/iio/industrialio-core.c
+@@ -835,7 +835,23 @@ static ssize_t iio_format_avail_list(char *buf, const int *vals,
+ 
+ static ssize_t iio_format_avail_range(char *buf, const int *vals, int type)
+ {
+-      return iio_format_list(buf, vals, type, 3, "[", "]");
++      int length;
++
++      /*
++       * length refers to the array size , not the number of elements.
++       * The purpose is to print the range [min , step ,max] so length should
++       * be 3 in case of int, and 6 for other types.
++       */
++      switch (type) {
++      case IIO_VAL_INT:
++              length = 3;
++              break;
++      default:
++              length = 6;
++              break;
++      }
++
++      return iio_format_list(buf, vals, type, length, "[", "]");
+ }
+ 
+ static ssize_t iio_read_channel_info_avail(struct device *dev,
+-- 
+2.35.1
+
diff --git a/queue-5.19/iio-light-isl29028-fix-the-warning-in-isl29028_remov.patch b/queue-5.19/iio-light-isl29028-fix-the-warning-in-isl29028_remov.patch

new file mode 100644 (file)

index 0000000..1f68a10
--- /dev/null
+++ b/queue-5.19/iio-light-isl29028-fix-the-warning-in-isl29028_remov.patch
@@ -0,0 +1,54 @@
+From fccda6e8e53e594d0b4589122144a7b6c4fe3c39 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Jul 2022 08:42:41 +0800
+Subject: iio: light: isl29028: Fix the warning in isl29028_remove()
+
+From: Zheyu Ma <zheyuma97@gmail.com>
+
+[ Upstream commit 06674fc7c003b9d0aa1d37fef7ab2c24802cc6ad ]
+
+The driver use the non-managed form of the register function in
+isl29028_remove(). To keep the release order as mirroring the ordering
+in probe, the driver should use non-managed form in probe, too.
+
+The following log reveals it:
+
+[   32.374955] isl29028 0-0010: remove
+[   32.376861] general protection fault, probably for non-canonical address 0xdffffc0000000006: 0000 [#1] PREEMPT SMP KASAN PTI
+[   32.377676] KASAN: null-ptr-deref in range [0x0000000000000030-0x0000000000000037]
+[   32.379432] RIP: 0010:kernfs_find_and_get_ns+0x28/0xe0
+[   32.385461] Call Trace:
+[   32.385807]  sysfs_unmerge_group+0x59/0x110
+[   32.386110]  dpm_sysfs_remove+0x58/0xc0
+[   32.386391]  device_del+0x296/0xe50
+[   32.386959]  cdev_device_del+0x1d/0xd0
+[   32.387231]  devm_iio_device_unreg+0x27/0xb0
+[   32.387542]  devres_release_group+0x319/0x3d0
+[   32.388162]  i2c_device_remove+0x93/0x1f0
+
+Fixes: 2db5054ac28d ("staging: iio: isl29028: add runtime power management support")
+Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
+Link: https://lore.kernel.org/r/20220717004241.2281028-1-zheyuma97@gmail.com
+Cc: <Stable@vger.kernel.org>
+Signed-off-by: Jonathan Cameron <Jonathan.Cameron@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iio/light/isl29028.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iio/light/isl29028.c b/drivers/iio/light/isl29028.c
+index 9de3262aa688..a62787f5d5e7 100644
+--- a/drivers/iio/light/isl29028.c
++++ b/drivers/iio/light/isl29028.c
+@@ -625,7 +625,7 @@ static int isl29028_probe(struct i2c_client *client,
+                                        ISL29028_POWER_OFF_DELAY_MS);
+       pm_runtime_use_autosuspend(&client->dev);
+ 
+-      ret = devm_iio_device_register(indio_dev->dev.parent, indio_dev);
++      ret = iio_device_register(indio_dev);
+       if (ret < 0) {
+               dev_err(&client->dev,
+                       "%s(): iio registration failed with error %d\n",
+-- 
+2.35.1
+
diff --git a/queue-5.19/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch b/queue-5.19/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch

new file mode 100644 (file)

index 0000000..5e83586
--- /dev/null
+++ b/queue-5.19/input-gscps2-check-return-value-of-ioremap-in-gscps2.patch
@@ -0,0 +1,40 @@
+From 6983dec2f67e9edf159f000438f316aeb7a36614 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 15:20:33 +0800
+Subject: Input: gscps2 - check return value of ioremap() in gscps2_probe()
+
+From: Xie Shaowen <studentxswpy@163.com>
+
+[ Upstream commit e61b3125a4f036b3c6b87ffd656fc1ab00440ae9 ]
+
+The function ioremap() in gscps2_probe() can fail, so
+its return value should be checked.
+
+Fixes: 4bdc0d676a643 ("remove ioremap_nocache and devm_ioremap_nocache")
+Cc: <stable@vger.kernel.org> # v5.6+
+Reported-by: Hacash Robot <hacashRobot@santino.com>
+Signed-off-by: Xie Shaowen <studentxswpy@163.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/input/serio/gscps2.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/input/serio/gscps2.c b/drivers/input/serio/gscps2.c
+index a9065c6ab550..da2c67cb8642 100644
+--- a/drivers/input/serio/gscps2.c
++++ b/drivers/input/serio/gscps2.c
+@@ -350,6 +350,10 @@ static int __init gscps2_probe(struct parisc_device *dev)
+       ps2port->port = serio;
+       ps2port->padev = dev;
+       ps2port->addr = ioremap(hpa, GSC_STATUS + 4);
++      if (!ps2port->addr) {
++              ret = -ENOMEM;
++              goto fail_nomem;
++      }
+       spin_lock_init(&ps2port->lock);
+ 
+       gscps2_reset(ps2port);
+-- 
+2.35.1
+
diff --git a/queue-5.19/intel_idle-make-spr-c1-and-c1e-be-independent.patch b/queue-5.19/intel_idle-make-spr-c1-and-c1e-be-independent.patch

new file mode 100644 (file)

index 0000000..4fce388
--- /dev/null
+++ b/queue-5.19/intel_idle-make-spr-c1-and-c1e-be-independent.patch
@@ -0,0 +1,90 @@
+From 4bc48cd7377be5af70f3e97bb9e2afdb9bc30d20 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 16 Jul 2022 09:26:55 +0300
+Subject: intel_idle: make SPR C1 and C1E be independent
+
+From: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+
+[ Upstream commit 1548fac47a114b42063def551eb152a536ed9697 ]
+
+This patch partially reverts the changes made by the following commit:
+
+da0e58c038e6 intel_idle: add 'preferred_cstates' module argument
+
+As that commit describes, on early Sapphire Rapids Xeon platforms the C1 and
+C1E states were mutually exclusive, so that users could only have either C1 and
+C6, or C1E and C6.
+
+However, Intel firmware engineers managed to remove this limitation and make C1
+and C1E to be completely independent, just like on previous Xeon platforms.
+
+Therefore, this patch:
+ * Removes commentary describing the old, and now non-existing SPR C1E
+   limitation.
+ * Marks SPR C1E as available by default.
+ * Removes the 'preferred_cstates' parameter handling for SPR. Both C1 and
+   C1E will be available regardless of 'preferred_cstates' value.
+
+We expect that all SPR systems are shipping with new firmware, which includes
+the C1/C1E improvement.
+
+Cc: v5.18+ <stable@vger.kernel.org> # v5.18+
+Signed-off-by: Artem Bityutskiy <artem.bityutskiy@linux.intel.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/idle/intel_idle.c | 24 +-----------------------
+ 1 file changed, 1 insertion(+), 23 deletions(-)
+
+diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
+index 907700d1e78e..9515a3146dc9 100644
+--- a/drivers/idle/intel_idle.c
++++ b/drivers/idle/intel_idle.c
+@@ -911,16 +911,6 @@ static struct cpuidle_state adl_l_cstates[] __initdata = {
+               .enter = NULL }
+ };
+ 
+-/*
+- * On Sapphire Rapids Xeon C1 has to be disabled if C1E is enabled, and vice
+- * versa. On SPR C1E is enabled only if "C1E promotion" bit is set in
+- * MSR_IA32_POWER_CTL. But in this case there effectively no C1, because C1
+- * requests are promoted to C1E. If the "C1E promotion" bit is cleared, then
+- * both C1 and C1E requests end up with C1, so there is effectively no C1E.
+- *
+- * By default we enable C1 and disable C1E by marking it with
+- * 'CPUIDLE_FLAG_UNUSABLE'.
+- */
+ static struct cpuidle_state spr_cstates[] __initdata = {
+       {
+               .name = "C1",
+@@ -933,8 +923,7 @@ static struct cpuidle_state spr_cstates[] __initdata = {
+       {
+               .name = "C1E",
+               .desc = "MWAIT 0x01",
+-              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE |
+-                                         CPUIDLE_FLAG_UNUSABLE,
++              .flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_ALWAYS_ENABLE,
+               .exit_latency = 2,
+               .target_residency = 4,
+               .enter = &intel_idle,
+@@ -1756,17 +1745,6 @@ static void __init spr_idle_state_table_update(void)
+ {
+       unsigned long long msr;
+ 
+-      /* Check if user prefers C1E over C1. */
+-      if ((preferred_states_mask & BIT(2)) &&
+-          !(preferred_states_mask & BIT(1))) {
+-              /* Disable C1 and enable C1E. */
+-              spr_cstates[0].flags |= CPUIDLE_FLAG_UNUSABLE;
+-              spr_cstates[1].flags &= ~CPUIDLE_FLAG_UNUSABLE;
+-
+-              /* Enable C1E using the "C1E promotion" bit. */
+-              c1e_promotion = C1E_PROMOTION_ENABLE;
+-      }
+-
+       /*
+        * By default, the C6 state assumes the worst-case scenario of package
+        * C6. However, if PC6 is disabled, we update the numbers to match
+-- 
+2.35.1
+
diff --git a/queue-5.19/intel_th-pci-add-meteor-lake-p-support.patch b/queue-5.19/intel_th-pci-add-meteor-lake-p-support.patch

new file mode 100644 (file)

index 0000000..e9ee9f2
--- /dev/null
+++ b/queue-5.19/intel_th-pci-add-meteor-lake-p-support.patch
@@ -0,0 +1,40 @@
+From 95c2d28a3fd73754f11f4c43aca36c0eead443f1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:35 +0300
+Subject: intel_th: pci: Add Meteor Lake-P support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit 802a9a0b1d91274ef10d9fe429b4cc1e8c200aef ]
+
+Add support for the Trace Hub in Meteor Lake-P.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-5-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index 7da4f298ed01..f432a772571b 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -278,6 +278,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x54a6),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Meteor Lake-P */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.19/intel_th-pci-add-raptor-lake-s-cpu-support.patch b/queue-5.19/intel_th-pci-add-raptor-lake-s-cpu-support.patch

new file mode 100644 (file)

index 0000000..2d28f20
--- /dev/null
+++ b/queue-5.19/intel_th-pci-add-raptor-lake-s-cpu-support.patch
@@ -0,0 +1,40 @@
+From 76ea4bc32ae8138c98d828c5e4d28c328924ac75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:37 +0300
+Subject: intel_th: pci: Add Raptor Lake-S CPU support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit ff46a601afc5a66a81c3945b83d0a2caeb88e8bc ]
+
+Add support for the Trace Hub in Raptor Lake-S CPU.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-7-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index ff034eac7c7b..2f450e6ce4a8 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -288,6 +288,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Raptor Lake-S CPU */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0xa76f),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.19/intel_th-pci-add-raptor-lake-s-pch-support.patch b/queue-5.19/intel_th-pci-add-raptor-lake-s-pch-support.patch

new file mode 100644 (file)

index 0000000..4b55e04
--- /dev/null
+++ b/queue-5.19/intel_th-pci-add-raptor-lake-s-pch-support.patch
@@ -0,0 +1,40 @@
+From 0ceb5df13389e530394c518b7a6c37ec9055ec06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 11:26:36 +0300
+Subject: intel_th: pci: Add Raptor Lake-S PCH support
+
+From: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+
+[ Upstream commit 23e2de5826e2fc4dd43e08bab3a2ea1a5338b063 ]
+
+Add support for the Trace Hub in Raptor Lake-S PCH.
+
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Cc: stable <stable@kernel.org>
+Signed-off-by: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Link: https://lore.kernel.org/r/20220705082637.59979-6-alexander.shishkin@linux.intel.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hwtracing/intel_th/pci.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/hwtracing/intel_th/pci.c b/drivers/hwtracing/intel_th/pci.c
+index f432a772571b..ff034eac7c7b 100644
+--- a/drivers/hwtracing/intel_th/pci.c
++++ b/drivers/hwtracing/intel_th/pci.c
+@@ -283,6 +283,11 @@ static const struct pci_device_id intel_th_pci_id_table[] = {
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7e24),
+               .driver_data = (kernel_ulong_t)&intel_th_2x,
+       },
++      {
++              /* Raptor Lake-S */
++              PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x7a26),
++              .driver_data = (kernel_ulong_t)&intel_th_2x,
++      },
+       {
+               /* Alder Lake CPU */
+               PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x466f),
+-- 
+2.35.1
+
diff --git a/queue-5.19/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch b/queue-5.19/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch

new file mode 100644 (file)

index 0000000..887e1cd
--- /dev/null
+++ b/queue-5.19/iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch
@@ -0,0 +1,66 @@
+From 29c1976d7e8b43501ab6a08bdd811a28c9c81178 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 17:38:36 +0200
+Subject: iommu/vt-d: avoid invalid memory access via node_online(NUMA_NO_NODE)
+
+From: Alexander Lobakin <alexandr.lobakin@intel.com>
+
+[ Upstream commit b0b0b77ea611e3088e9523e60860f4f41b62b235 ]
+
+KASAN reports:
+
+[ 4.668325][ T0] BUG: KASAN: wild-memory-access in dmar_parse_one_rhsa (arch/x86/include/asm/bitops.h:214 arch/x86/include/asm/bitops.h:226 include/asm-generic/bitops/instrumented-non-atomic.h:142 include/linux/nodemask.h:415 drivers/iommu/intel/dmar.c:497)
+[    4.676149][    T0] Read of size 8 at addr 1fffffff85115558 by task swapper/0/0
+[    4.683454][    T0]
+[    4.685638][    T0] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.19.0-rc3-00004-g0e862838f290 #1
+[    4.694331][    T0] Hardware name: Supermicro SYS-5018D-FN4T/X10SDV-8C-TLN4F, BIOS 1.1 03/02/2016
+[    4.703196][    T0] Call Trace:
+[    4.706334][    T0]  <TASK>
+[ 4.709133][ T0] ? dmar_parse_one_rhsa (arch/x86/include/asm/bitops.h:214 arch/x86/include/asm/bitops.h:226 include/asm-generic/bitops/instrumented-non-atomic.h:142 include/linux/nodemask.h:415 drivers/iommu/intel/dmar.c:497)
+
+after converting the type of the first argument (@nr, bit number)
+of arch_test_bit() from `long` to `unsigned long`[0].
+
+Under certain conditions (for example, when ACPI NUMA is disabled
+via command line), pxm_to_node() can return %NUMA_NO_NODE (-1).
+It is valid 'magic' number of NUMA node, but not valid bit number
+to use in bitops.
+node_online() eventually descends to test_bit() without checking
+for the input, assuming it's on caller side (which might be good
+for perf-critical tasks). There, -1 becomes %ULONG_MAX which leads
+to an insane array index when calculating bit position in memory.
+
+For now, add an explicit check for @node being not %NUMA_NO_NODE
+before calling test_bit(). The actual logics didn't change here
+at all.
+
+[0] https://github.com/norov/linux/commit/0e862838f290147ea9c16db852d8d494b552d38d
+
+Fixes: ee34b32d8c29 ("dmar: support for parsing Remapping Hardware Static Affinity structure")
+Cc: stable@vger.kernel.org # 2.6.33+
+Reported-by: kernel test robot <oliver.sang@intel.com>
+Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Reviewed-by: Lu Baolu <baolu.lu@linux.intel.com>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/iommu/intel/dmar.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
+index 9699ca101c62..64b14ac4c7b0 100644
+--- a/drivers/iommu/intel/dmar.c
++++ b/drivers/iommu/intel/dmar.c
+@@ -494,7 +494,7 @@ static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
+               if (drhd->reg_base_addr == rhsa->base_address) {
+                       int node = pxm_to_node(rhsa->proximity_domain);
+ 
+-                      if (!node_online(node))
++                      if (node != NUMA_NO_NODE && !node_online(node))
+                               node = NUMA_NO_NODE;
+                       drhd->iommu->node = node;
+                       return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kexec-clean-up-arch_kexec_kernel_verify_sig.patch b/queue-5.19/kexec-clean-up-arch_kexec_kernel_verify_sig.patch

new file mode 100644 (file)

index 0000000..54384b4
--- /dev/null
+++ b/queue-5.19/kexec-clean-up-arch_kexec_kernel_verify_sig.patch
@@ -0,0 +1,107 @@
+From 55415b8fddd6f945f1c54224061d8eec2a369f09 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 21:40:24 +0800
+Subject: kexec: clean up arch_kexec_kernel_verify_sig
+
+From: Coiby Xu <coxu@redhat.com>
+
+[ Upstream commit 689a71493bd2f31c024f8c0395f85a1fd4b2138e ]
+
+Before commit 105e10e2cf1c ("kexec_file: drop weak attribute from
+functions"), there was already no arch-specific implementation
+of arch_kexec_kernel_verify_sig. With weak attribute dropped by that
+commit, arch_kexec_kernel_verify_sig is completely useless. So clean it
+up.
+
+Note later patches are dependent on this patch so it should be backported
+to the stable tree as well.
+
+Cc: stable@vger.kernel.org
+Suggested-by: Eric W. Biederman <ebiederm@xmission.com>
+Reviewed-by: Michal Suchanek <msuchanek@suse.de>
+Acked-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Coiby Xu <coxu@redhat.com>
+[zohar@linux.ibm.com: reworded patch description "Note"]
+Link: https://lore.kernel.org/linux-integrity/20220714134027.394370-1-coxu@redhat.com/
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/kexec.h |  5 -----
+ kernel/kexec_file.c   | 33 +++++++++++++--------------------
+ 2 files changed, 13 insertions(+), 25 deletions(-)
+
+diff --git a/include/linux/kexec.h b/include/linux/kexec.h
+index 6958c6b471f4..6e7510f39368 100644
+--- a/include/linux/kexec.h
++++ b/include/linux/kexec.h
+@@ -212,11 +212,6 @@ static inline void *arch_kexec_kernel_image_load(struct kimage *image)
+ }
+ #endif
+ 
+-#ifdef CONFIG_KEXEC_SIG
+-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+-                               unsigned long buf_len);
+-#endif
+-
+ extern int kexec_add_buffer(struct kexec_buf *kbuf);
+ int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+ 
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index 0c27c81351ee..6dc1294c90fc 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -81,24 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
+       return image->fops->cleanup(image->image_loader_data);
+ }
+ 
+-#ifdef CONFIG_KEXEC_SIG
+-static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+-                                        unsigned long buf_len)
+-{
+-      if (!image->fops || !image->fops->verify_sig) {
+-              pr_debug("kernel loader does not support signature verification.\n");
+-              return -EKEYREJECTED;
+-      }
+-
+-      return image->fops->verify_sig(buf, buf_len);
+-}
+-
+-int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
+-{
+-      return kexec_image_verify_sig_default(image, buf, buf_len);
+-}
+-#endif
+-
+ /*
+  * Free up memory used by kernel, initrd, and command line. This is temporary
+  * memory allocation which is not needed any more after these buffers have
+@@ -141,13 +123,24 @@ void kimage_file_post_load_cleanup(struct kimage *image)
+ }
+ 
+ #ifdef CONFIG_KEXEC_SIG
++static int kexec_image_verify_sig(struct kimage *image, void *buf,
++                                unsigned long buf_len)
++{
++      if (!image->fops || !image->fops->verify_sig) {
++              pr_debug("kernel loader does not support signature verification.\n");
++              return -EKEYREJECTED;
++      }
++
++      return image->fops->verify_sig(buf, buf_len);
++}
++
+ static int
+ kimage_validate_signature(struct kimage *image)
+ {
+       int ret;
+ 
+-      ret = arch_kexec_kernel_verify_sig(image, image->kernel_buf,
+-                                         image->kernel_buf_len);
++      ret = kexec_image_verify_sig(image, image->kernel_buf,
++                                   image->kernel_buf_len);
+       if (ret) {
+ 
+               if (sig_enforce) {
+-- 
+2.35.1
+
diff --git a/queue-5.19/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch b/queue-5.19/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch

new file mode 100644 (file)

index 0000000..88c27d5
--- /dev/null
+++ b/queue-5.19/kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch
@@ -0,0 +1,72 @@
+From fbcfad4d65668fe2d3175e24950085ef35c39f99 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 21:40:27 +0800
+Subject: kexec, KEYS, s390: Make use of built-in and secondary keyring for
+ signature verification
+
+From: Michal Suchanek <msuchanek@suse.de>
+
+[ Upstream commit 0828c4a39be57768b8788e8cbd0d84683ea757e5 ]
+
+commit e23a8020ce4e ("s390/kexec_file: Signature verification prototype")
+adds support for KEXEC_SIG verification with keys from platform keyring
+but the built-in keys and secondary keyring are not used.
+
+Add support for the built-in keys and secondary keyring as x86 does.
+
+Fixes: e23a8020ce4e ("s390/kexec_file: Signature verification prototype")
+Cc: stable@vger.kernel.org
+Cc: Philipp Rudo <prudo@linux.ibm.com>
+Cc: kexec@lists.infradead.org
+Cc: keyrings@vger.kernel.org
+Cc: linux-security-module@vger.kernel.org
+Signed-off-by: Michal Suchanek <msuchanek@suse.de>
+Reviewed-by: "Lee, Chun-Yi" <jlee@suse.com>
+Acked-by: Baoquan He <bhe@redhat.com>
+Signed-off-by: Coiby Xu <coxu@redhat.com>
+Acked-by: Heiko Carstens <hca@linux.ibm.com>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kernel/machine_kexec_file.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c
+index 8f43575a4dd3..fc6d5f58debe 100644
+--- a/arch/s390/kernel/machine_kexec_file.c
++++ b/arch/s390/kernel/machine_kexec_file.c
+@@ -31,6 +31,7 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+       const unsigned long marker_len = sizeof(MODULE_SIG_STRING) - 1;
+       struct module_signature *ms;
+       unsigned long sig_len;
++      int ret;
+ 
+       /* Skip signature verification when not secure IPLed. */
+       if (!ipl_secure_flag)
+@@ -65,11 +66,18 @@ int s390_verify_sig(const char *kernel, unsigned long kernel_len)
+               return -EBADMSG;
+       }
+ 
+-      return verify_pkcs7_signature(kernel, kernel_len,
+-                                    kernel + kernel_len, sig_len,
+-                                    VERIFY_USE_PLATFORM_KEYRING,
+-                                    VERIFYING_MODULE_SIGNATURE,
+-                                    NULL, NULL);
++      ret = verify_pkcs7_signature(kernel, kernel_len,
++                                   kernel + kernel_len, sig_len,
++                                   VERIFY_USE_SECONDARY_KEYRING,
++                                   VERIFYING_MODULE_SIGNATURE,
++                                   NULL, NULL);
++      if (ret == -ENOKEY && IS_ENABLED(CONFIG_INTEGRITY_PLATFORM_KEYRING))
++              ret = verify_pkcs7_signature(kernel, kernel_len,
++                                           kernel + kernel_len, sig_len,
++                                           VERIFY_USE_PLATFORM_KEYRING,
++                                           VERIFYING_MODULE_SIGNATURE,
++                                           NULL, NULL);
++      return ret;
+ }
+ #endif /* CONFIG_KEXEC_SIG */
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kexec_file-drop-weak-attribute-from-functions.patch b/queue-5.19/kexec_file-drop-weak-attribute-from-functions.patch

new file mode 100644 (file)

index 0000000..ed0f005
--- /dev/null
+++ b/queue-5.19/kexec_file-drop-weak-attribute-from-functions.patch
@@ -0,0 +1,261 @@
+From 622c3c9b81ca49726b95a4da6c54eb1b75e7532f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 13:04:04 +0530
+Subject: kexec_file: drop weak attribute from functions
+
+From: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+
+[ Upstream commit 65d9a9a60fd71be964effb2e94747a6acb6e7015 ]
+
+As requested
+(http://lkml.kernel.org/r/87ee0q7b92.fsf@email.froward.int.ebiederm.org),
+this series converts weak functions in kexec to use the #ifdef approach.
+
+Quoting the 3e35142ef99fe ("kexec_file: drop weak attribute from
+arch_kexec_apply_relocations[_add]") changelog:
+
+: Since commit d1bcae833b32f1 ("ELF: Don't generate unused section symbols")
+: [1], binutils (v2.36+) started dropping section symbols that it thought
+: were unused.  This isn't an issue in general, but with kexec_file.c, gcc
+: is placing kexec_arch_apply_relocations[_add] into a separate
+: .text.unlikely section and the section symbol ".text.unlikely" is being
+: dropped.  Due to this, recordmcount is unable to find a non-weak symbol in
+: .text.unlikely to generate a relocation record against.
+
+This patch (of 2);
+
+Drop __weak attribute from functions in kexec_file.c:
+- arch_kexec_kernel_image_probe()
+- arch_kimage_file_post_load_cleanup()
+- arch_kexec_kernel_image_load()
+- arch_kexec_locate_mem_hole()
+- arch_kexec_kernel_verify_sig()
+
+arch_kexec_kernel_image_load() calls into kexec_image_load_default(), so
+drop the static attribute for the latter.
+
+arch_kexec_kernel_verify_sig() is not overridden by any architecture, so
+drop the __weak attribute.
+
+Link: https://lkml.kernel.org/r/cover.1656659357.git.naveen.n.rao@linux.vnet.ibm.com
+Link: https://lkml.kernel.org/r/2cd7ca1fe4d6bb6ca38e3283c717878388ed6788.1656659357.git.naveen.n.rao@linux.vnet.ibm.com
+Signed-off-by: Naveen N. Rao <naveen.n.rao@linux.vnet.ibm.com>
+Suggested-by: Eric Biederman <ebiederm@xmission.com>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Mimi Zohar <zohar@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/include/asm/kexec.h   |  4 ++-
+ arch/powerpc/include/asm/kexec.h |  9 +++++++
+ arch/s390/include/asm/kexec.h    |  3 +++
+ arch/x86/include/asm/kexec.h     |  6 +++++
+ include/linux/kexec.h            | 44 +++++++++++++++++++++++++++-----
+ kernel/kexec_file.c              | 35 ++-----------------------
+ 6 files changed, 61 insertions(+), 40 deletions(-)
+
+diff --git a/arch/arm64/include/asm/kexec.h b/arch/arm64/include/asm/kexec.h
+index 9839bfc163d7..78d272b26ebd 100644
+--- a/arch/arm64/include/asm/kexec.h
++++ b/arch/arm64/include/asm/kexec.h
+@@ -115,7 +115,9 @@ extern const struct kexec_file_ops kexec_image_ops;
+ 
+ struct kimage;
+ 
+-extern int arch_kimage_file_post_load_cleanup(struct kimage *image);
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
++
+ extern int load_other_segments(struct kimage *image,
+               unsigned long kernel_load_addr, unsigned long kernel_size,
+               char *initrd, unsigned long initrd_len,
+diff --git a/arch/powerpc/include/asm/kexec.h b/arch/powerpc/include/asm/kexec.h
+index 2aefe14e1442..1e5e9b6ec78d 100644
+--- a/arch/powerpc/include/asm/kexec.h
++++ b/arch/powerpc/include/asm/kexec.h
+@@ -120,6 +120,15 @@ int setup_purgatory(struct kimage *image, const void *slave_code,
+ #ifdef CONFIG_PPC64
+ struct kexec_buf;
+ 
++int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len);
++#define arch_kexec_kernel_image_probe arch_kexec_kernel_image_probe
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
++
++int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
++#define arch_kexec_locate_mem_hole arch_kexec_locate_mem_hole
++
+ int load_crashdump_segments_ppc64(struct kimage *image,
+                                 struct kexec_buf *kbuf);
+ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code,
+diff --git a/arch/s390/include/asm/kexec.h b/arch/s390/include/asm/kexec.h
+index 649ecdcc8734..8886aadc11a3 100644
+--- a/arch/s390/include/asm/kexec.h
++++ b/arch/s390/include/asm/kexec.h
+@@ -92,5 +92,8 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+                                    const Elf_Shdr *relsec,
+                                    const Elf_Shdr *symtab);
+ #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+ #endif
+ #endif /*_S390_KEXEC_H */
+diff --git a/arch/x86/include/asm/kexec.h b/arch/x86/include/asm/kexec.h
+index 6ad8d946cd3e..5ec359c1b50c 100644
+--- a/arch/x86/include/asm/kexec.h
++++ b/arch/x86/include/asm/kexec.h
+@@ -193,6 +193,12 @@ int arch_kexec_apply_relocations_add(struct purgatory_info *pi,
+                                    const Elf_Shdr *relsec,
+                                    const Elf_Shdr *symtab);
+ #define arch_kexec_apply_relocations_add arch_kexec_apply_relocations_add
++
++void *arch_kexec_kernel_image_load(struct kimage *image);
++#define arch_kexec_kernel_image_load arch_kexec_kernel_image_load
++
++int arch_kimage_file_post_load_cleanup(struct kimage *image);
++#define arch_kimage_file_post_load_cleanup arch_kimage_file_post_load_cleanup
+ #endif
+ #endif
+ 
+diff --git a/include/linux/kexec.h b/include/linux/kexec.h
+index 475683cd67f1..6958c6b471f4 100644
+--- a/include/linux/kexec.h
++++ b/include/linux/kexec.h
+@@ -188,21 +188,53 @@ int kexec_purgatory_get_set_symbol(struct kimage *image, const char *name,
+                                  void *buf, unsigned int size,
+                                  bool get_value);
+ void *kexec_purgatory_get_symbol_addr(struct kimage *image, const char *name);
++void *kexec_image_load_default(struct kimage *image);
++
++#ifndef arch_kexec_kernel_image_probe
++static inline int
++arch_kexec_kernel_image_probe(struct kimage *image, void *buf, unsigned long buf_len)
++{
++      return kexec_image_probe_default(image, buf, buf_len);
++}
++#endif
++
++#ifndef arch_kimage_file_post_load_cleanup
++static inline int arch_kimage_file_post_load_cleanup(struct kimage *image)
++{
++      return kexec_image_post_load_cleanup_default(image);
++}
++#endif
++
++#ifndef arch_kexec_kernel_image_load
++static inline void *arch_kexec_kernel_image_load(struct kimage *image)
++{
++      return kexec_image_load_default(image);
++}
++#endif
+ 
+-/* Architectures may override the below functions */
+-int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+-                                unsigned long buf_len);
+-void *arch_kexec_kernel_image_load(struct kimage *image);
+-int arch_kimage_file_post_load_cleanup(struct kimage *image);
+ #ifdef CONFIG_KEXEC_SIG
+ int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+                                unsigned long buf_len);
+ #endif
+-int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf);
+ 
+ extern int kexec_add_buffer(struct kexec_buf *kbuf);
+ int kexec_locate_mem_hole(struct kexec_buf *kbuf);
+ 
++#ifndef arch_kexec_locate_mem_hole
++/**
++ * arch_kexec_locate_mem_hole - Find free memory to place the segments.
++ * @kbuf:                       Parameters for the memory search.
++ *
++ * On success, kbuf->mem will have the start address of the memory region found.
++ *
++ * Return: 0 on success, negative errno on error.
++ */
++static inline int arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
++{
++      return kexec_locate_mem_hole(kbuf);
++}
++#endif
++
+ /* Alignment required for elf header segment */
+ #define ELF_CORE_HEADER_ALIGN   4096
+ 
+diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
+index f9261c07b048..0c27c81351ee 100644
+--- a/kernel/kexec_file.c
++++ b/kernel/kexec_file.c
+@@ -62,14 +62,7 @@ int kexec_image_probe_default(struct kimage *image, void *buf,
+       return ret;
+ }
+ 
+-/* Architectures can provide this probe function */
+-int __weak arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+-                                       unsigned long buf_len)
+-{
+-      return kexec_image_probe_default(image, buf, buf_len);
+-}
+-
+-static void *kexec_image_load_default(struct kimage *image)
++void *kexec_image_load_default(struct kimage *image)
+ {
+       if (!image->fops || !image->fops->load)
+               return ERR_PTR(-ENOEXEC);
+@@ -80,11 +73,6 @@ static void *kexec_image_load_default(struct kimage *image)
+                                image->cmdline_buf_len);
+ }
+ 
+-void * __weak arch_kexec_kernel_image_load(struct kimage *image)
+-{
+-      return kexec_image_load_default(image);
+-}
+-
+ int kexec_image_post_load_cleanup_default(struct kimage *image)
+ {
+       if (!image->fops || !image->fops->cleanup)
+@@ -93,11 +81,6 @@ int kexec_image_post_load_cleanup_default(struct kimage *image)
+       return image->fops->cleanup(image->image_loader_data);
+ }
+ 
+-int __weak arch_kimage_file_post_load_cleanup(struct kimage *image)
+-{
+-      return kexec_image_post_load_cleanup_default(image);
+-}
+-
+ #ifdef CONFIG_KEXEC_SIG
+ static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+                                         unsigned long buf_len)
+@@ -110,8 +93,7 @@ static int kexec_image_verify_sig_default(struct kimage *image, void *buf,
+       return image->fops->verify_sig(buf, buf_len);
+ }
+ 
+-int __weak arch_kexec_kernel_verify_sig(struct kimage *image, void *buf,
+-                                      unsigned long buf_len)
++int arch_kexec_kernel_verify_sig(struct kimage *image, void *buf, unsigned long buf_len)
+ {
+       return kexec_image_verify_sig_default(image, buf, buf_len);
+ }
+@@ -621,19 +603,6 @@ int kexec_locate_mem_hole(struct kexec_buf *kbuf)
+       return ret == 1 ? 0 : -EADDRNOTAVAIL;
+ }
+ 
+-/**
+- * arch_kexec_locate_mem_hole - Find free memory to place the segments.
+- * @kbuf:                       Parameters for the memory search.
+- *
+- * On success, kbuf->mem will have the start address of the memory region found.
+- *
+- * Return: 0 on success, negative errno on error.
+- */
+-int __weak arch_kexec_locate_mem_hole(struct kexec_buf *kbuf)
+-{
+-      return kexec_locate_mem_hole(kbuf);
+-}
+-
+ /**
+  * kexec_add_buffer - place a buffer in a kexec segment
+  * @kbuf:     Buffer contents and memory parameters.
+-- 
+2.35.1
+
diff --git a/queue-5.19/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch b/queue-5.19/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch

new file mode 100644 (file)

index 0000000..2ca979d
--- /dev/null
+++ b/queue-5.19/keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch
@@ -0,0 +1,60 @@
+From 548653917e982bf248e05a7954f2b05ec6318982 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 11:37:20 +0800
+Subject: KEYS: asymmetric: enforce SM2 signature use pkey algo
+
+From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+
+[ Upstream commit 0815291a8fd66cdcf7db1445d4d99b0d16065829 ]
+
+The signature verification of SM2 needs to add the Za value and
+recalculate sig->digest, which requires the detection of the pkey_algo
+in public_key_verify_signature(). As Eric Biggers said, the pkey_algo
+field in sig is attacker-controlled and should be use pkey->pkey_algo
+instead of sig->pkey_algo, and secondly, if sig->pkey_algo is NULL, it
+will also cause signature verification failure.
+
+The software_key_determine_akcipher() already forces the algorithms
+are matched, so the SM3 algorithm is enforced in the SM2 signature,
+although this has been checked, we still avoid using any algorithm
+information in the signature as input.
+
+Fixes: 215525639631 ("X.509: support OSCCA SM2-with-SM3 certificate verification")
+Reported-by: Eric Biggers <ebiggers@google.com>
+Cc: stable@vger.kernel.org # v5.10+
+Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ crypto/asymmetric_keys/public_key.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/crypto/asymmetric_keys/public_key.c b/crypto/asymmetric_keys/public_key.c
+index 7c9e6be35c30..2f8352e88860 100644
+--- a/crypto/asymmetric_keys/public_key.c
++++ b/crypto/asymmetric_keys/public_key.c
+@@ -304,6 +304,10 @@ static int cert_sig_digest_update(const struct public_key_signature *sig,
+ 
+       BUG_ON(!sig->data);
+ 
++      /* SM2 signatures always use the SM3 hash algorithm */
++      if (!sig->hash_algo || strcmp(sig->hash_algo, "sm3") != 0)
++              return -EINVAL;
++
+       ret = sm2_compute_z_digest(tfm_pkey, SM2_DEFAULT_USERID,
+                                       SM2_DEFAULT_USERID_LEN, dgst);
+       if (ret)
+@@ -414,8 +418,7 @@ int public_key_verify_signature(const struct public_key *pkey,
+       if (ret)
+               goto error_free_key;
+ 
+-      if (sig->pkey_algo && strcmp(sig->pkey_algo, "sm2") == 0 &&
+-          sig->data_size) {
++      if (strcmp(pkey->pkey_algo, "sm2") == 0 && sig->data_size) {
+               ret = cert_sig_digest_update(sig, tfm);
+               if (ret)
+                       goto error_free_key;
+-- 
+2.35.1
+
diff --git a/queue-5.19/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch-15594 b/queue-5.19/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch-15594

new file mode 100644 (file)

index 0000000..4785f00
--- /dev/null
+++ b/queue-5.19/ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch-15594
@@ -0,0 +1,441 @@
+From 9bf4a78ad07955391a1bcea5c8192e58c1385198 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 07:28:51 +0900
+Subject: ksmbd: fix heap-based overflow in set_ntacl_dacl()
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+[ Upstream commit 8f0541186e9ad1b62accc9519cc2b7a7240272a7 ]
+
+The testcase use SMB2_SET_INFO_HE command to set a malformed file attribute
+under the label `security.NTACL`. SMB2_QUERY_INFO_HE command in testcase
+trigger the following overflow.
+
+[ 4712.003781] ==================================================================
+[ 4712.003790] BUG: KASAN: slab-out-of-bounds in build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.003807] Write of size 1060 at addr ffff88801e34c068 by task kworker/0:0/4190
+
+[ 4712.003813] CPU: 0 PID: 4190 Comm: kworker/0:0 Not tainted 5.19.0-rc5 #1
+[ 4712.003850] Workqueue: ksmbd-io handle_ksmbd_work [ksmbd]
+[ 4712.003867] Call Trace:
+[ 4712.003870]  <TASK>
+[ 4712.003873]  dump_stack_lvl+0x49/0x5f
+[ 4712.003935]  print_report.cold+0x5e/0x5cf
+[ 4712.003972]  ? ksmbd_vfs_get_sd_xattr+0x16d/0x500 [ksmbd]
+[ 4712.003984]  ? cmp_map_id+0x200/0x200
+[ 4712.003988]  ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004000]  kasan_report+0xaa/0x120
+[ 4712.004045]  ? build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004056]  kasan_check_range+0x100/0x1e0
+[ 4712.004060]  memcpy+0x3c/0x60
+[ 4712.004064]  build_sec_desc+0x842/0x1dd0 [ksmbd]
+[ 4712.004076]  ? parse_sec_desc+0x580/0x580 [ksmbd]
+[ 4712.004088]  ? ksmbd_acls_fattr+0x281/0x410 [ksmbd]
+[ 4712.004099]  smb2_query_info+0xa8f/0x6110 [ksmbd]
+[ 4712.004111]  ? psi_group_change+0x856/0xd70
+[ 4712.004148]  ? update_load_avg+0x1c3/0x1af0
+[ 4712.004152]  ? asym_cpu_capacity_scan+0x5d0/0x5d0
+[ 4712.004157]  ? xas_load+0x23/0x300
+[ 4712.004162]  ? smb2_query_dir+0x1530/0x1530 [ksmbd]
+[ 4712.004173]  ? _raw_spin_lock_bh+0xe0/0xe0
+[ 4712.004179]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 4712.004192]  process_one_work+0x778/0x11c0
+[ 4712.004227]  ? _raw_spin_lock_irq+0x8e/0xe0
+[ 4712.004231]  worker_thread+0x544/0x1180
+[ 4712.004234]  ? __cpuidle_text_end+0x4/0x4
+[ 4712.004239]  kthread+0x282/0x320
+[ 4712.004243]  ? process_one_work+0x11c0/0x11c0
+[ 4712.004246]  ? kthread_complete_and_exit+0x30/0x30
+[ 4712.004282]  ret_from_fork+0x1f/0x30
+
+This patch add the buffer validation for security descriptor that is
+stored by malformed SMB2_SET_INFO_HE command. and allocate large
+response buffer about SMB2_O_INFO_SECURITY file info class.
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17771
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2pdu.c |  39 +++++++++-----
+ fs/ksmbd/smbacl.c  | 130 ++++++++++++++++++++++++++++++---------------
+ fs/ksmbd/smbacl.h  |   2 +-
+ fs/ksmbd/vfs.c     |   5 ++
+ 4 files changed, 119 insertions(+), 57 deletions(-)
+
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 54aaf9014136..a9c33d15ca1f 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -535,9 +535,10 @@ int smb2_allocate_rsp_buf(struct ksmbd_work *work)
+               struct smb2_query_info_req *req;
+ 
+               req = smb2_get_msg(work->request_buf);
+-              if (req->InfoType == SMB2_O_INFO_FILE &&
+-                  (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
+-                   req->FileInfoClass == FILE_ALL_INFORMATION))
++              if ((req->InfoType == SMB2_O_INFO_FILE &&
++                   (req->FileInfoClass == FILE_FULL_EA_INFORMATION ||
++                   req->FileInfoClass == FILE_ALL_INFORMATION)) ||
++                  req->InfoType == SMB2_O_INFO_SECURITY)
+                       sz = large_sz;
+       }
+ 
+@@ -2974,7 +2975,7 @@ int smb2_open(struct ksmbd_work *work)
+                                               goto err_out;
+ 
+                                       rc = build_sec_desc(user_ns,
+-                                                          pntsd, NULL,
++                                                          pntsd, NULL, 0,
+                                                           OWNER_SECINFO |
+                                                           GROUP_SECINFO |
+                                                           DACL_SECINFO,
+@@ -3819,6 +3820,15 @@ static int verify_info_level(int info_level)
+       return 0;
+ }
+ 
++static int smb2_resp_buf_len(struct ksmbd_work *work, unsigned short hdr2_len)
++{
++      int free_len;
++
++      free_len = (int)(work->response_sz -
++              (get_rfc1002_len(work->response_buf) + 4)) - hdr2_len;
++      return free_len;
++}
++
+ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+                                    unsigned short hdr2_len,
+                                    unsigned int out_buf_len)
+@@ -3828,9 +3838,7 @@ static int smb2_calc_max_out_buf_len(struct ksmbd_work *work,
+       if (out_buf_len > work->conn->vals->max_trans_size)
+               return -EINVAL;
+ 
+-      free_len = (int)(work->response_sz -
+-                       (get_rfc1002_len(work->response_buf) + 4)) -
+-              hdr2_len;
++      free_len = smb2_resp_buf_len(work, hdr2_len);
+       if (free_len < 0)
+               return -EINVAL;
+ 
+@@ -5093,10 +5101,10 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
+       struct smb_ntsd *pntsd = (struct smb_ntsd *)rsp->Buffer, *ppntsd = NULL;
+       struct smb_fattr fattr = {{0}};
+       struct inode *inode;
+-      __u32 secdesclen;
++      __u32 secdesclen = 0;
+       unsigned int id = KSMBD_NO_FID, pid = KSMBD_NO_FID;
+       int addition_info = le32_to_cpu(req->AdditionalInformation);
+-      int rc;
++      int rc = 0, ppntsd_size = 0;
+ 
+       if (addition_info & ~(OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+                             PROTECTED_DACL_SECINFO |
+@@ -5142,11 +5150,14 @@ static int smb2_get_info_sec(struct ksmbd_work *work,
+ 
+       if (test_share_config_flag(work->tcon->share_conf,
+                                  KSMBD_SHARE_FLAG_ACL_XATTR))
+-              ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
+-                                     fp->filp->f_path.dentry, &ppntsd);
+-
+-      rc = build_sec_desc(user_ns, pntsd, ppntsd, addition_info,
+-                          &secdesclen, &fattr);
++              ppntsd_size = ksmbd_vfs_get_sd_xattr(work->conn, user_ns,
++                                                   fp->filp->f_path.dentry,
++                                                   &ppntsd);
++
++      /* Check if sd buffer size exceeds response buffer size */
++      if (smb2_resp_buf_len(work, 8) > ppntsd_size)
++              rc = build_sec_desc(user_ns, pntsd, ppntsd, ppntsd_size,
++                                  addition_info, &secdesclen, &fattr);
+       posix_acl_release(fattr.cf_acls);
+       posix_acl_release(fattr.cf_dacls);
+       kfree(ppntsd);
+diff --git a/fs/ksmbd/smbacl.c b/fs/ksmbd/smbacl.c
+index 38f23bf981ac..3781bca2c8fc 100644
+--- a/fs/ksmbd/smbacl.c
++++ b/fs/ksmbd/smbacl.c
+@@ -690,6 +690,7 @@ static void set_posix_acl_entries_dacl(struct user_namespace *user_ns,
+ static void set_ntacl_dacl(struct user_namespace *user_ns,
+                          struct smb_acl *pndacl,
+                          struct smb_acl *nt_dacl,
++                         unsigned int aces_size,
+                          const struct smb_sid *pownersid,
+                          const struct smb_sid *pgrpsid,
+                          struct smb_fattr *fattr)
+@@ -703,9 +704,19 @@ static void set_ntacl_dacl(struct user_namespace *user_ns,
+       if (nt_num_aces) {
+               ntace = (struct smb_ace *)((char *)nt_dacl + sizeof(struct smb_acl));
+               for (i = 0; i < nt_num_aces; i++) {
+-                      memcpy((char *)pndace + size, ntace, le16_to_cpu(ntace->size));
+-                      size += le16_to_cpu(ntace->size);
+-                      ntace = (struct smb_ace *)((char *)ntace + le16_to_cpu(ntace->size));
++                      unsigned short nt_ace_size;
++
++                      if (offsetof(struct smb_ace, access_req) > aces_size)
++                              break;
++
++                      nt_ace_size = le16_to_cpu(ntace->size);
++                      if (nt_ace_size > aces_size)
++                              break;
++
++                      memcpy((char *)pndace + size, ntace, nt_ace_size);
++                      size += nt_ace_size;
++                      aces_size -= nt_ace_size;
++                      ntace = (struct smb_ace *)((char *)ntace + nt_ace_size);
+                       num_aces++;
+               }
+       }
+@@ -878,7 +889,7 @@ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+ /* Convert permission bits from mode to equivalent CIFS ACL */
+ int build_sec_desc(struct user_namespace *user_ns,
+                  struct smb_ntsd *pntsd, struct smb_ntsd *ppntsd,
+-                 int addition_info, __u32 *secdesclen,
++                 int ppntsd_size, int addition_info, __u32 *secdesclen,
+                  struct smb_fattr *fattr)
+ {
+       int rc = 0;
+@@ -938,15 +949,25 @@ int build_sec_desc(struct user_namespace *user_ns,
+ 
+               if (!ppntsd) {
+                       set_mode_dacl(user_ns, dacl_ptr, fattr);
+-              } else if (!ppntsd->dacloffset) {
+-                      goto out;
+               } else {
+                       struct smb_acl *ppdacl_ptr;
++                      unsigned int dacl_offset = le32_to_cpu(ppntsd->dacloffset);
++                      int ppdacl_size, ntacl_size = ppntsd_size - dacl_offset;
++
++                      if (!dacl_offset ||
++                          (dacl_offset + sizeof(struct smb_acl) > ppntsd_size))
++                              goto out;
++
++                      ppdacl_ptr = (struct smb_acl *)((char *)ppntsd + dacl_offset);
++                      ppdacl_size = le16_to_cpu(ppdacl_ptr->size);
++                      if (ppdacl_size > ntacl_size ||
++                          ppdacl_size < sizeof(struct smb_acl))
++                              goto out;
+ 
+-                      ppdacl_ptr = (struct smb_acl *)((char *)ppntsd +
+-                                              le32_to_cpu(ppntsd->dacloffset));
+                       set_ntacl_dacl(user_ns, dacl_ptr, ppdacl_ptr,
+-                                     nowner_sid_ptr, ngroup_sid_ptr, fattr);
++                                     ntacl_size - sizeof(struct smb_acl),
++                                     nowner_sid_ptr, ngroup_sid_ptr,
++                                     fattr);
+               }
+               pntsd->dacloffset = cpu_to_le32(offset);
+               offset += le16_to_cpu(dacl_ptr->size);
+@@ -980,24 +1001,31 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+       struct smb_sid owner_sid, group_sid;
+       struct dentry *parent = path->dentry->d_parent;
+       struct user_namespace *user_ns = mnt_user_ns(path->mnt);
+-      int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0;
+-      int rc = 0, num_aces, dacloffset, pntsd_type, acl_len;
++      int inherited_flags = 0, flags = 0, i, ace_cnt = 0, nt_size = 0, pdacl_size;
++      int rc = 0, num_aces, dacloffset, pntsd_type, pntsd_size, acl_len, aces_size;
+       char *aces_base;
+       bool is_dir = S_ISDIR(d_inode(path->dentry)->i_mode);
+ 
+-      acl_len = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+-                                       parent, &parent_pntsd);
+-      if (acl_len <= 0)
++      pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++                                          parent, &parent_pntsd);
++      if (pntsd_size <= 0)
+               return -ENOENT;
+       dacloffset = le32_to_cpu(parent_pntsd->dacloffset);
+-      if (!dacloffset) {
++      if (!dacloffset || (dacloffset + sizeof(struct smb_acl) > pntsd_size)) {
+               rc = -EINVAL;
+               goto free_parent_pntsd;
+       }
+ 
+       parent_pdacl = (struct smb_acl *)((char *)parent_pntsd + dacloffset);
++      acl_len = pntsd_size - dacloffset;
+       num_aces = le32_to_cpu(parent_pdacl->num_aces);
+       pntsd_type = le16_to_cpu(parent_pntsd->type);
++      pdacl_size = le16_to_cpu(parent_pdacl->size);
++
++      if (pdacl_size > acl_len || pdacl_size < sizeof(struct smb_acl)) {
++              rc = -EINVAL;
++              goto free_parent_pntsd;
++      }
+ 
+       aces_base = kmalloc(sizeof(struct smb_ace) * num_aces * 2, GFP_KERNEL);
+       if (!aces_base) {
+@@ -1008,11 +1036,23 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+       aces = (struct smb_ace *)aces_base;
+       parent_aces = (struct smb_ace *)((char *)parent_pdacl +
+                       sizeof(struct smb_acl));
++      aces_size = acl_len - sizeof(struct smb_acl);
+ 
+       if (pntsd_type & DACL_AUTO_INHERITED)
+               inherited_flags = INHERITED_ACE;
+ 
+       for (i = 0; i < num_aces; i++) {
++              int pace_size;
++
++              if (offsetof(struct smb_ace, access_req) > aces_size)
++                      break;
++
++              pace_size = le16_to_cpu(parent_aces->size);
++              if (pace_size > aces_size)
++                      break;
++
++              aces_size -= pace_size;
++
+               flags = parent_aces->flags;
+               if (!smb_inherit_flags(flags, is_dir))
+                       goto pass;
+@@ -1057,8 +1097,7 @@ int smb_inherit_dacl(struct ksmbd_conn *conn,
+               aces = (struct smb_ace *)((char *)aces + le16_to_cpu(aces->size));
+               ace_cnt++;
+ pass:
+-              parent_aces =
+-                      (struct smb_ace *)((char *)parent_aces + le16_to_cpu(parent_aces->size));
++              parent_aces = (struct smb_ace *)((char *)parent_aces + pace_size);
+       }
+ 
+       if (nt_size > 0) {
+@@ -1153,7 +1192,7 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+       struct smb_ntsd *pntsd = NULL;
+       struct smb_acl *pdacl;
+       struct posix_acl *posix_acls;
+-      int rc = 0, acl_size;
++      int rc = 0, pntsd_size, acl_size, aces_size, pdacl_size, dacl_offset;
+       struct smb_sid sid;
+       int granted = le32_to_cpu(*pdaccess & ~FILE_MAXIMAL_ACCESS_LE);
+       struct smb_ace *ace;
+@@ -1162,37 +1201,33 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+       struct smb_ace *others_ace = NULL;
+       struct posix_acl_entry *pa_entry;
+       unsigned int sid_type = SIDOWNER;
+-      char *end_of_acl;
++      unsigned short ace_size;
+ 
+       ksmbd_debug(SMB, "check permission using windows acl\n");
+-      acl_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
+-                                        path->dentry, &pntsd);
+-      if (acl_size <= 0 || !pntsd || !pntsd->dacloffset) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      pntsd_size = ksmbd_vfs_get_sd_xattr(conn, user_ns,
++                                          path->dentry, &pntsd);
++      if (pntsd_size <= 0 || !pntsd)
++              goto err_out;
++
++      dacl_offset = le32_to_cpu(pntsd->dacloffset);
++      if (!dacl_offset ||
++          (dacl_offset + sizeof(struct smb_acl) > pntsd_size))
++              goto err_out;
+ 
+       pdacl = (struct smb_acl *)((char *)pntsd + le32_to_cpu(pntsd->dacloffset));
+-      end_of_acl = ((char *)pntsd) + acl_size;
+-      if (end_of_acl <= (char *)pdacl) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      acl_size = pntsd_size - dacl_offset;
++      pdacl_size = le16_to_cpu(pdacl->size);
+ 
+-      if (end_of_acl < (char *)pdacl + le16_to_cpu(pdacl->size) ||
+-          le16_to_cpu(pdacl->size) < sizeof(struct smb_acl)) {
+-              kfree(pntsd);
+-              return 0;
+-      }
++      if (pdacl_size > acl_size || pdacl_size < sizeof(struct smb_acl))
++              goto err_out;
+ 
+       if (!pdacl->num_aces) {
+-              if (!(le16_to_cpu(pdacl->size) - sizeof(struct smb_acl)) &&
++              if (!(pdacl_size - sizeof(struct smb_acl)) &&
+                   *pdaccess & ~(FILE_READ_CONTROL_LE | FILE_WRITE_DAC_LE)) {
+                       rc = -EACCES;
+                       goto err_out;
+               }
+-              kfree(pntsd);
+-              return 0;
++              goto err_out;
+       }
+ 
+       if (*pdaccess & FILE_MAXIMAL_ACCESS_LE) {
+@@ -1200,11 +1235,16 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+                       DELETE;
+ 
+               ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++              aces_size = acl_size - sizeof(struct smb_acl);
+               for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++                      if (offsetof(struct smb_ace, access_req) > aces_size)
++                              break;
++                      ace_size = le16_to_cpu(ace->size);
++                      if (ace_size > aces_size)
++                              break;
++                      aces_size -= ace_size;
+                       granted |= le32_to_cpu(ace->access_req);
+                       ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+-                      if (end_of_acl < (char *)ace)
+-                              goto err_out;
+               }
+ 
+               if (!pdacl->num_aces)
+@@ -1216,7 +1256,15 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+       id_to_sid(uid, sid_type, &sid);
+ 
+       ace = (struct smb_ace *)((char *)pdacl + sizeof(struct smb_acl));
++      aces_size = acl_size - sizeof(struct smb_acl);
+       for (i = 0; i < le32_to_cpu(pdacl->num_aces); i++) {
++              if (offsetof(struct smb_ace, access_req) > aces_size)
++                      break;
++              ace_size = le16_to_cpu(ace->size);
++              if (ace_size > aces_size)
++                      break;
++              aces_size -= ace_size;
++
+               if (!compare_sids(&sid, &ace->sid) ||
+                   !compare_sids(&sid_unix_NFS_mode, &ace->sid)) {
+                       found = 1;
+@@ -1226,8 +1274,6 @@ int smb_check_perm_dacl(struct ksmbd_conn *conn, struct path *path,
+                       others_ace = ace;
+ 
+               ace = (struct smb_ace *)((char *)ace + le16_to_cpu(ace->size));
+-              if (end_of_acl < (char *)ace)
+-                      goto err_out;
+       }
+ 
+       if (*pdaccess & FILE_MAXIMAL_ACCESS_LE && found) {
+diff --git a/fs/ksmbd/smbacl.h b/fs/ksmbd/smbacl.h
+index 811af3309429..fcb2c83f2992 100644
+--- a/fs/ksmbd/smbacl.h
++++ b/fs/ksmbd/smbacl.h
+@@ -193,7 +193,7 @@ struct posix_acl_state {
+ int parse_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+                  int acl_len, struct smb_fattr *fattr);
+ int build_sec_desc(struct user_namespace *user_ns, struct smb_ntsd *pntsd,
+-                 struct smb_ntsd *ppntsd, int addition_info,
++                 struct smb_ntsd *ppntsd, int ppntsd_size, int addition_info,
+                  __u32 *secdesclen, struct smb_fattr *fattr);
+ int init_acl_state(struct posix_acl_state *state, int cnt);
+ void free_acl_state(struct posix_acl_state *state);
+diff --git a/fs/ksmbd/vfs.c b/fs/ksmbd/vfs.c
+index 05efcdf7a4a7..201962f03772 100644
+--- a/fs/ksmbd/vfs.c
++++ b/fs/ksmbd/vfs.c
+@@ -1540,6 +1540,11 @@ int ksmbd_vfs_get_sd_xattr(struct ksmbd_conn *conn,
+       }
+ 
+       *pntsd = acl.sd_buf;
++      if (acl.sd_size < sizeof(struct smb_ntsd)) {
++              pr_err("sd size is invalid\n");
++              goto out_free;
++      }
++
+       (*pntsd)->osidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->osidoffset) -
+                                          NDR_NTSD_OFFSETOF);
+       (*pntsd)->gsidoffset = cpu_to_le32(le32_to_cpu((*pntsd)->gsidoffset) -
+-- 
+2.35.1
+
diff --git a/queue-5.19/ksmbd-fix-memory-leak-in-smb2_handle_negotiate.patch-5672 b/queue-5.19/ksmbd-fix-memory-leak-in-smb2_handle_negotiate.patch-5672

new file mode 100644 (file)

index 0000000..7d1aa6f
--- /dev/null
+++ b/queue-5.19/ksmbd-fix-memory-leak-in-smb2_handle_negotiate.patch-5672
@@ -0,0 +1,47 @@
+From 1aee2ce6d6f534f164521c98e4804bd102190706 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 21:56:19 +0900
+Subject: ksmbd: fix memory leak in smb2_handle_negotiate
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+[ Upstream commit aa7253c2393f6dcd6a1468b0792f6da76edad917 ]
+
+The allocated memory didn't free under an error
+path in smb2_handle_negotiate().
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17815
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2pdu.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index 353f047e783c..a06dad0d1bb7 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -1139,12 +1139,16 @@ int smb2_handle_negotiate(struct ksmbd_work *work)
+                              status);
+                       rsp->hdr.Status = status;
+                       rc = -EINVAL;
++                      kfree(conn->preauth_info);
++                      conn->preauth_info = NULL;
+                       goto err_out;
+               }
+ 
+               rc = init_smb3_11_server(conn);
+               if (rc < 0) {
+                       rsp->hdr.Status = STATUS_INVALID_PARAMETER;
++                      kfree(conn->preauth_info);
++                      conn->preauth_info = NULL;
+                       goto err_out;
+               }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/ksmbd-fix-use-after-free-bug-in-smb2_tree_disconect.patch-30412 b/queue-5.19/ksmbd-fix-use-after-free-bug-in-smb2_tree_disconect.patch-30412

new file mode 100644 (file)

index 0000000..2765202
--- /dev/null
+++ b/queue-5.19/ksmbd-fix-use-after-free-bug-in-smb2_tree_disconect.patch-30412
@@ -0,0 +1,64 @@
+From 0fd5c7331b0c3cb35bf6512187a54dae4e16631f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 21:57:08 +0900
+Subject: ksmbd: fix use-after-free bug in smb2_tree_disconect
+
+From: Namjae Jeon <linkinjeon@kernel.org>
+
+[ Upstream commit cf6531d98190fa2cf92a6d8bbc8af0a4740a223c ]
+
+smb2_tree_disconnect() freed the struct ksmbd_tree_connect,
+but it left the dangling pointer. It can be accessed
+again under compound requests.
+
+This bug can lead an oops looking something link:
+
+[ 1685.468014 ] BUG: KASAN: use-after-free in ksmbd_tree_conn_disconnect+0x131/0x160 [ksmbd]
+[ 1685.468068 ] Read of size 4 at addr ffff888102172180 by task kworker/1:2/4807
+...
+[ 1685.468130 ] Call Trace:
+[ 1685.468132 ]  <TASK>
+[ 1685.468135 ]  dump_stack_lvl+0x49/0x5f
+[ 1685.468141 ]  print_report.cold+0x5e/0x5cf
+[ 1685.468145 ]  ? ksmbd_tree_conn_disconnect+0x131/0x160 [ksmbd]
+[ 1685.468157 ]  kasan_report+0xaa/0x120
+[ 1685.468194 ]  ? ksmbd_tree_conn_disconnect+0x131/0x160 [ksmbd]
+[ 1685.468206 ]  __asan_report_load4_noabort+0x14/0x20
+[ 1685.468210 ]  ksmbd_tree_conn_disconnect+0x131/0x160 [ksmbd]
+[ 1685.468222 ]  smb2_tree_disconnect+0x175/0x250 [ksmbd]
+[ 1685.468235 ]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 1685.468247 ]  process_one_work+0x778/0x11c0
+[ 1685.468251 ]  ? _raw_spin_lock_irq+0x8e/0xe0
+[ 1685.468289 ]  worker_thread+0x544/0x1180
+[ 1685.468293 ]  ? __cpuidle_text_end+0x4/0x4
+[ 1685.468297 ]  kthread+0x282/0x320
+[ 1685.468301 ]  ? process_one_work+0x11c0/0x11c0
+[ 1685.468305 ]  ? kthread_complete_and_exit+0x30/0x30
+[ 1685.468309 ]  ret_from_fork+0x1f/0x30
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17816
+Signed-off-by: Namjae Jeon <linkinjeon@kernel.org>
+Reviewed-by: Hyunchul Lee <hyc.lee@gmail.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2pdu.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index a06dad0d1bb7..b5835e78a325 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -2043,6 +2043,7 @@ int smb2_tree_disconnect(struct ksmbd_work *work)
+ 
+       ksmbd_close_tree_conn_fds(work);
+       ksmbd_tree_conn_disconnect(sess, tcon);
++      work->tcon = NULL;
+       return 0;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_tree_connne.patch b/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_tree_connne.patch

new file mode 100644 (file)

index 0000000..2dd061c
--- /dev/null
+++ b/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_tree_connne.patch
@@ -0,0 +1,74 @@
+From 03cee3ff6652e9af63d94336023f871553f60b74 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 21:58:53 +0900
+Subject: ksmbd: prevent out of bound read for SMB2_TREE_CONNNECT
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+[ Upstream commit 824d4f64c20093275f72fc8101394d75ff6a249e ]
+
+if Status is not 0 and PathLength is long,
+smb_strndup_from_utf16 could make out of bound
+read in smb2_tree_connnect.
+
+This bug can lead an oops looking something like:
+
+[ 1553.882047] BUG: KASAN: slab-out-of-bounds in smb_strndup_from_utf16+0x469/0x4c0 [ksmbd]
+[ 1553.882064] Read of size 2 at addr ffff88802c4eda04 by task kworker/0:2/42805
+...
+[ 1553.882095] Call Trace:
+[ 1553.882098]  <TASK>
+[ 1553.882101]  dump_stack_lvl+0x49/0x5f
+[ 1553.882107]  print_report.cold+0x5e/0x5cf
+[ 1553.882112]  ? smb_strndup_from_utf16+0x469/0x4c0 [ksmbd]
+[ 1553.882122]  kasan_report+0xaa/0x120
+[ 1553.882128]  ? smb_strndup_from_utf16+0x469/0x4c0 [ksmbd]
+[ 1553.882139]  __asan_report_load_n_noabort+0xf/0x20
+[ 1553.882143]  smb_strndup_from_utf16+0x469/0x4c0 [ksmbd]
+[ 1553.882155]  ? smb_strtoUTF16+0x3b0/0x3b0 [ksmbd]
+[ 1553.882166]  ? __kmalloc_node+0x185/0x430
+[ 1553.882171]  smb2_tree_connect+0x140/0xab0 [ksmbd]
+[ 1553.882185]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[ 1553.882197]  process_one_work+0x778/0x11c0
+[ 1553.882201]  ? _raw_spin_lock_irq+0x8e/0xe0
+[ 1553.882206]  worker_thread+0x544/0x1180
+[ 1553.882209]  ? __cpuidle_text_end+0x4/0x4
+[ 1553.882214]  kthread+0x282/0x320
+[ 1553.882218]  ? process_one_work+0x11c0/0x11c0
+[ 1553.882221]  ? kthread_complete_and_exit+0x30/0x30
+[ 1553.882225]  ret_from_fork+0x1f/0x30
+[ 1553.882231]  </TASK>
+
+There is no need to check error request validation in server.
+This check allow invalid requests not to validate message.
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17818
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2misc.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
+index aa1e663d9deb..6e25ace36568 100644
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -90,11 +90,6 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+       *off = 0;
+       *len = 0;
+ 
+-      /* error reqeusts do not have data area */
+-      if (hdr->Status && hdr->Status != STATUS_MORE_PROCESSING_REQUIRED &&
+-          (((struct smb2_err_rsp *)hdr)->StructureSize) == SMB2_ERROR_STRUCTURE_SIZE2_LE)
+-              return ret;
+-
+       /*
+        * Following commands have data areas so we have to get the location
+        * of the data buffer offset and data buffer length for the particular
+-- 
+2.35.1
+
diff --git a/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch-20867 b/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch-20867

new file mode 100644 (file)

index 0000000..0b95033
--- /dev/null
+++ b/queue-5.19/ksmbd-prevent-out-of-bound-read-for-smb2_write.patch-20867
@@ -0,0 +1,128 @@
+From 40b114a8b3385152b4e63c017bc73d910d2556dc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 23:41:51 +0900
+Subject: ksmbd: prevent out of bound read for SMB2_WRITE
+
+From: Hyunchul Lee <hyc.lee@gmail.com>
+
+[ Upstream commit ac60778b87e45576d7bfdbd6f53df902654e6f09 ]
+
+OOB read memory can be written to a file,
+if DataOffset is 0 and Length is too large
+in SMB2_WRITE request of compound request.
+
+To prevent this, when checking the length of
+the data area of SMB2_WRITE in smb2_get_data_area_len(),
+let the minimum of DataOffset be the size of
+SMB2 header + the size of SMB2_WRITE header.
+
+This bug can lead an oops looking something like:
+
+[  798.008715] BUG: KASAN: slab-out-of-bounds in copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008724] Read of size 252 at addr ffff88800f863e90 by task kworker/0:2/2859
+...
+[  798.008754] Call Trace:
+[  798.008756]  <TASK>
+[  798.008759]  dump_stack_lvl+0x49/0x5f
+[  798.008764]  print_report.cold+0x5e/0x5cf
+[  798.008768]  ? __filemap_get_folio+0x285/0x6d0
+[  798.008774]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008777]  kasan_report+0xaa/0x120
+[  798.008781]  ? copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008784]  kasan_check_range+0x100/0x1e0
+[  798.008788]  memcpy+0x24/0x60
+[  798.008792]  copy_page_from_iter_atomic+0xd3d/0x14b0
+[  798.008795]  ? pagecache_get_page+0x53/0x160
+[  798.008799]  ? iov_iter_get_pages_alloc+0x1590/0x1590
+[  798.008803]  ? ext4_write_begin+0xfc0/0xfc0
+[  798.008807]  ? current_time+0x72/0x210
+[  798.008811]  generic_perform_write+0x2c8/0x530
+[  798.008816]  ? filemap_fdatawrite_wbc+0x180/0x180
+[  798.008820]  ? down_write+0xb4/0x120
+[  798.008824]  ? down_write_killable+0x130/0x130
+[  798.008829]  ext4_buffered_write_iter+0x137/0x2c0
+[  798.008833]  ext4_file_write_iter+0x40b/0x1490
+[  798.008837]  ? __fsnotify_parent+0x275/0xb20
+[  798.008842]  ? __fsnotify_update_child_dentry_flags+0x2c0/0x2c0
+[  798.008846]  ? ext4_buffered_write_iter+0x2c0/0x2c0
+[  798.008851]  __kernel_write+0x3a1/0xa70
+[  798.008855]  ? __x64_sys_preadv2+0x160/0x160
+[  798.008860]  ? security_file_permission+0x4a/0xa0
+[  798.008865]  kernel_write+0xbb/0x360
+[  798.008869]  ksmbd_vfs_write+0x27e/0xb90 [ksmbd]
+[  798.008881]  ? ksmbd_vfs_read+0x830/0x830 [ksmbd]
+[  798.008892]  ? _raw_read_unlock+0x2a/0x50
+[  798.008896]  smb2_write+0xb45/0x14e0 [ksmbd]
+[  798.008909]  ? __kasan_check_write+0x14/0x20
+[  798.008912]  ? _raw_spin_lock_bh+0xd0/0xe0
+[  798.008916]  ? smb2_read+0x15e0/0x15e0 [ksmbd]
+[  798.008927]  ? memcpy+0x4e/0x60
+[  798.008931]  ? _raw_spin_unlock+0x19/0x30
+[  798.008934]  ? ksmbd_smb2_check_message+0x16af/0x2350 [ksmbd]
+[  798.008946]  ? _raw_spin_lock_bh+0xe0/0xe0
+[  798.008950]  handle_ksmbd_work+0x30e/0x1020 [ksmbd]
+[  798.008962]  process_one_work+0x778/0x11c0
+[  798.008966]  ? _raw_spin_lock_irq+0x8e/0xe0
+[  798.008970]  worker_thread+0x544/0x1180
+[  798.008973]  ? __cpuidle_text_end+0x4/0x4
+[  798.008977]  kthread+0x282/0x320
+[  798.008982]  ? process_one_work+0x11c0/0x11c0
+[  798.008985]  ? kthread_complete_and_exit+0x30/0x30
+[  798.008989]  ret_from_fork+0x1f/0x30
+[  798.008995]  </TASK>
+
+Fixes: e2f34481b24d ("cifsd: add server-side procedures for SMB3")
+Cc: stable@vger.kernel.org
+Reported-by: zdi-disclosures@trendmicro.com # ZDI-CAN-17817
+Signed-off-by: Hyunchul Lee <hyc.lee@gmail.com>
+Acked-by: Namjae Jeon <linkinjeon@kernel.org>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/ksmbd/smb2misc.c | 7 +++++--
+ fs/ksmbd/smb2pdu.c  | 8 +++-----
+ 2 files changed, 8 insertions(+), 7 deletions(-)
+
+diff --git a/fs/ksmbd/smb2misc.c b/fs/ksmbd/smb2misc.c
+index f8f456377a51..aa1e663d9deb 100644
+--- a/fs/ksmbd/smb2misc.c
++++ b/fs/ksmbd/smb2misc.c
+@@ -136,8 +136,11 @@ static int smb2_get_data_area_len(unsigned int *off, unsigned int *len,
+               *len = le16_to_cpu(((struct smb2_read_req *)hdr)->ReadChannelInfoLength);
+               break;
+       case SMB2_WRITE:
+-              if (((struct smb2_write_req *)hdr)->DataOffset) {
+-                      *off = le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset);
++              if (((struct smb2_write_req *)hdr)->DataOffset ||
++                  ((struct smb2_write_req *)hdr)->Length) {
++                      *off = max_t(unsigned int,
++                                   le16_to_cpu(((struct smb2_write_req *)hdr)->DataOffset),
++                                   offsetof(struct smb2_write_req, Buffer));
+                       *len = le32_to_cpu(((struct smb2_write_req *)hdr)->Length);
+                       break;
+               }
+diff --git a/fs/ksmbd/smb2pdu.c b/fs/ksmbd/smb2pdu.c
+index b5835e78a325..54aaf9014136 100644
+--- a/fs/ksmbd/smb2pdu.c
++++ b/fs/ksmbd/smb2pdu.c
+@@ -6500,14 +6500,12 @@ int smb2_write(struct ksmbd_work *work)
+               writethrough = true;
+ 
+       if (is_rdma_channel == false) {
+-              if ((u64)le16_to_cpu(req->DataOffset) + length >
+-                  get_rfc1002_len(work->request_buf)) {
+-                      pr_err("invalid write data offset %u, smb_len %u\n",
+-                             le16_to_cpu(req->DataOffset),
+-                             get_rfc1002_len(work->request_buf));
++              if (le16_to_cpu(req->DataOffset) <
++                  offsetof(struct smb2_write_req, Buffer)) {
+                       err = -EINVAL;
+                       goto out;
+               }
++
+               data_buf = (char *)(((char *)&req->hdr.ProtocolId) +
+                                   le16_to_cpu(req->DataOffset));
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cach.patch b/queue-5.19/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cach.patch

new file mode 100644 (file)

index 0000000..872cec8
--- /dev/null
+++ b/queue-5.19/kvm-do-not-incorporate-page-offset-into-gfn-pfn-cach.patch
@@ -0,0 +1,43 @@
+From ea59497b46e7a2ab3f493caa24e211a155376ac1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 21:00:22 +0000
+Subject: KVM: Do not incorporate page offset into gfn=>pfn cache user address
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 3ba2c95ea180740b16281fa43a3ee5f47279c0ed ]
+
+Don't adjust the userspace address in the gfn=>pfn cache by the page
+offset from the gpa.  KVM should never use the user address directly, and
+all KVM operations that translate a user address to something else
+require the user address to be page aligned.  Ignoring the offset will
+allow the cache to reuse a gfn=>hva translation in the unlikely event
+that the page offset of the gpa changes, but the gfn does not.  And more
+importantly, not having to (un)adjust the user address will simplify a
+future bug fix.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/pfncache.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
+index 40cbe90d52e0..05cb0bcbf662 100644
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -179,8 +179,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+                       ret = -EFAULT;
+                       goto out;
+               }
+-
+-              gpc->uhva += page_offset;
+       }
+ 
+       /*
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__rel.patch b/queue-5.19/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__rel.patch

new file mode 100644 (file)

index 0000000..6b32d3c
--- /dev/null
+++ b/queue-5.19/kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__rel.patch
@@ -0,0 +1,92 @@
+From 42a8593225a8bdc485b1f7203bc49cd5b103376f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 21:00:20 +0000
+Subject: KVM: Drop unused @gpa param from gfn=>pfn cache's __release_gpc()
+ helper
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 345b0fd6fe5f66dfe841bad0b39dd11a5672df68 ]
+
+Drop the @pga param from __release_gpc() and rename the helper to make it
+more obvious that the cache itself is not being released.  The helper
+will be reused by a future commit to release a pfn+khva combination that
+is _never_ associated with the cache, at which point the current name
+would go from slightly misleading to blatantly wrong.
+
+No functional change intended.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/pfncache.c | 10 +++-------
+ 1 file changed, 3 insertions(+), 7 deletions(-)
+
+diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
+index dd84676615f1..e05a6a1b8eff 100644
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -95,7 +95,7 @@ bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ }
+ EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_check);
+ 
+-static void __release_gpc(struct kvm *kvm, kvm_pfn_t pfn, void *khva, gpa_t gpa)
++static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
+ {
+       /* Unmap the old page if it was mapped before, and release it */
+       if (!is_error_noslot_pfn(pfn)) {
+@@ -146,7 +146,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+       unsigned long page_offset = gpa & ~PAGE_MASK;
+       kvm_pfn_t old_pfn, new_pfn;
+       unsigned long old_uhva;
+-      gpa_t old_gpa;
+       void *old_khva;
+       bool old_valid;
+       int ret = 0;
+@@ -160,7 +159,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ 
+       write_lock_irq(&gpc->lock);
+ 
+-      old_gpa = gpc->gpa;
+       old_pfn = gpc->pfn;
+       old_khva = gpc->khva - offset_in_page(gpc->khva);
+       old_uhva = gpc->uhva;
+@@ -244,7 +242,7 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+  out:
+       write_unlock_irq(&gpc->lock);
+ 
+-      __release_gpc(kvm, old_pfn, old_khva, old_gpa);
++      gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+       return ret;
+ }
+@@ -254,14 +252,12 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+ {
+       void *old_khva;
+       kvm_pfn_t old_pfn;
+-      gpa_t old_gpa;
+ 
+       write_lock_irq(&gpc->lock);
+ 
+       gpc->valid = false;
+ 
+       old_khva = gpc->khva - offset_in_page(gpc->khva);
+-      old_gpa = gpc->gpa;
+       old_pfn = gpc->pfn;
+ 
+       /*
+@@ -273,7 +269,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+ 
+       write_unlock_irq(&gpc->lock);
+ 
+-      __release_gpc(kvm, old_pfn, old_khva, old_gpa);
++      gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ }
+ EXPORT_SYMBOL_GPL(kvm_gfn_to_pfn_cache_unmap);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch-19149 b/queue-5.19/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch-19149

new file mode 100644 (file)

index 0000000..778dee8
--- /dev/null
+++ b/queue-5.19/kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch-19149
@@ -0,0 +1,363 @@
+From 2efc1788a4dacd0abc511650fbbbd867149698b7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 21:00:24 +0000
+Subject: KVM: Fix multiple races in gfn=>pfn cache refresh
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 58cd407ca4c6278cf9f9d09a2e663bf645b0c982 ]
+
+Rework the gfn=>pfn cache (gpc) refresh logic to address multiple races
+between the cache itself, and between the cache and mmu_notifier events.
+
+The existing refresh code attempts to guard against races with the
+mmu_notifier by speculatively marking the cache valid, and then marking
+it invalid if a mmu_notifier invalidation occurs.  That handles the case
+where an invalidation occurs between dropping and re-acquiring gpc->lock,
+but it doesn't handle the scenario where the cache is refreshed after the
+cache was invalidated by the notifier, but before the notifier elevates
+mmu_notifier_count.  The gpc refresh can't use the "retry" helper as its
+invalidation occurs _before_ mmu_notifier_count is elevated and before
+mmu_notifier_range_start is set/updated.
+
+  CPU0                                    CPU1
+  ----                                    ----
+
+  gfn_to_pfn_cache_invalidate_start()
+  |
+  -> gpc->valid = false;
+                                          kvm_gfn_to_pfn_cache_refresh()
+                                          |
+                                          |-> gpc->valid = true;
+
+                                          hva_to_pfn_retry()
+                                          |
+                                          -> acquire kvm->mmu_lock
+                                             kvm->mmu_notifier_count == 0
+                                             mmu_seq == kvm->mmu_notifier_seq
+                                             drop kvm->mmu_lock
+                                             return pfn 'X'
+  acquire kvm->mmu_lock
+  kvm_inc_notifier_count()
+  drop kvm->mmu_lock()
+  kernel frees pfn 'X'
+                                          kvm_gfn_to_pfn_cache_check()
+                                          |
+                                          |-> gpc->valid == true
+
+                                          caller accesses freed pfn 'X'
+
+Key off of mn_active_invalidate_count to detect that a pfncache refresh
+needs to wait for an in-progress mmu_notifier invalidation.  While
+mn_active_invalidate_count is not guaranteed to be stable, it is
+guaranteed to be elevated prior to an invalidation acquiring gpc->lock,
+so either the refresh will see an active invalidation and wait, or the
+invalidation will run after the refresh completes.
+
+Speculatively marking the cache valid is itself flawed, as a concurrent
+kvm_gfn_to_pfn_cache_check() would see a valid cache with stale pfn/khva
+values.  The KVM Xen use case explicitly allows/wants multiple users;
+even though the caches are allocated per vCPU, __kvm_xen_has_interrupt()
+can read a different vCPU (or vCPUs).  Address this race by invalidating
+the cache prior to dropping gpc->lock (this is made possible by fixing
+the above mmu_notifier race).
+
+Complicating all of this is the fact that both the hva=>pfn resolution
+and mapping of the kernel address can sleep, i.e. must be done outside
+of gpc->lock.
+
+Fix the above races in one fell swoop, trying to fix each individual race
+is largely pointless and essentially impossible to test, e.g. closing one
+hole just shifts the focus to the other hole.
+
+Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support")
+Cc: stable@vger.kernel.org
+Cc: David Woodhouse <dwmw@amazon.co.uk>
+Cc: Mingwei Zhang <mizhang@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-8-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/kvm_main.c |   9 +++
+ virt/kvm/pfncache.c | 193 ++++++++++++++++++++++++++++----------------
+ 2 files changed, 131 insertions(+), 71 deletions(-)
+
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index a49df8988cd6..28126ee221b5 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -724,6 +724,15 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
+       kvm->mn_active_invalidate_count++;
+       spin_unlock(&kvm->mn_invalidate_lock);
+ 
++      /*
++       * Invalidate pfn caches _before_ invalidating the secondary MMUs, i.e.
++       * before acquiring mmu_lock, to avoid holding mmu_lock while acquiring
++       * each cache's lock.  There are relatively few caches in existence at
++       * any given time, and the caches themselves can check for hva overlap,
++       * i.e. don't need to rely on memslot overlap checks for performance.
++       * Because this runs without holding mmu_lock, the pfn caches must use
++       * mn_active_invalidate_count (see above) instead of mmu_notifier_count.
++       */
+       gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
+                                         hva_range.may_block);
+ 
+diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
+index f610d3945b69..b0b678367376 100644
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -112,31 +112,122 @@ static void gpc_release_pfn_and_khva(struct kvm *kvm, kvm_pfn_t pfn, void *khva)
+       }
+ }
+ 
+-static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, unsigned long uhva)
++static inline bool mmu_notifier_retry_cache(struct kvm *kvm, unsigned long mmu_seq)
+ {
++      /*
++       * mn_active_invalidate_count acts for all intents and purposes
++       * like mmu_notifier_count here; but the latter cannot be used
++       * here because the invalidation of caches in the mmu_notifier
++       * event occurs _before_ mmu_notifier_count is elevated.
++       *
++       * Note, it does not matter that mn_active_invalidate_count
++       * is not protected by gpc->lock.  It is guaranteed to
++       * be elevated before the mmu_notifier acquires gpc->lock, and
++       * isn't dropped until after mmu_notifier_seq is updated.
++       */
++      if (kvm->mn_active_invalidate_count)
++              return true;
++
++      /*
++       * Ensure mn_active_invalidate_count is read before
++       * mmu_notifier_seq.  This pairs with the smp_wmb() in
++       * mmu_notifier_invalidate_range_end() to guarantee either the
++       * old (non-zero) value of mn_active_invalidate_count or the
++       * new (incremented) value of mmu_notifier_seq is observed.
++       */
++      smp_rmb();
++      return kvm->mmu_notifier_seq != mmu_seq;
++}
++
++static kvm_pfn_t hva_to_pfn_retry(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
++{
++      /* Note, the new page offset may be different than the old! */
++      void *old_khva = gpc->khva - offset_in_page(gpc->khva);
++      kvm_pfn_t new_pfn = KVM_PFN_ERR_FAULT;
++      void *new_khva = NULL;
+       unsigned long mmu_seq;
+-      kvm_pfn_t new_pfn;
+-      int retry;
++
++      lockdep_assert_held(&gpc->refresh_lock);
++
++      lockdep_assert_held_write(&gpc->lock);
++
++      /*
++       * Invalidate the cache prior to dropping gpc->lock, the gpa=>uhva
++       * assets have already been updated and so a concurrent check() from a
++       * different task may not fail the gpa/uhva/generation checks.
++       */
++      gpc->valid = false;
+ 
+       do {
+               mmu_seq = kvm->mmu_notifier_seq;
+               smp_rmb();
+ 
++              write_unlock_irq(&gpc->lock);
++
++              /*
++               * If the previous iteration "failed" due to an mmu_notifier
++               * event, release the pfn and unmap the kernel virtual address
++               * from the previous attempt.  Unmapping might sleep, so this
++               * needs to be done after dropping the lock.  Opportunistically
++               * check for resched while the lock isn't held.
++               */
++              if (new_pfn != KVM_PFN_ERR_FAULT) {
++                      /*
++                       * Keep the mapping if the previous iteration reused
++                       * the existing mapping and didn't create a new one.
++                       */
++                      if (new_khva == old_khva)
++                              new_khva = NULL;
++
++                      gpc_release_pfn_and_khva(kvm, new_pfn, new_khva);
++
++                      cond_resched();
++              }
++
+               /* We always request a writeable mapping */
+-              new_pfn = hva_to_pfn(uhva, false, NULL, true, NULL);
++              new_pfn = hva_to_pfn(gpc->uhva, false, NULL, true, NULL);
+               if (is_error_noslot_pfn(new_pfn))
+-                      break;
++                      goto out_error;
++
++              /*
++               * Obtain a new kernel mapping if KVM itself will access the
++               * pfn.  Note, kmap() and memremap() can both sleep, so this
++               * too must be done outside of gpc->lock!
++               */
++              if (gpc->usage & KVM_HOST_USES_PFN) {
++                      if (new_pfn == gpc->pfn) {
++                              new_khva = old_khva;
++                      } else if (pfn_valid(new_pfn)) {
++                              new_khva = kmap(pfn_to_page(new_pfn));
++#ifdef CONFIG_HAS_IOMEM
++                      } else {
++                              new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
++#endif
++                      }
++                      if (!new_khva) {
++                              kvm_release_pfn_clean(new_pfn);
++                              goto out_error;
++                      }
++              }
++
++              write_lock_irq(&gpc->lock);
+ 
+-              KVM_MMU_READ_LOCK(kvm);
+-              retry = mmu_notifier_retry_hva(kvm, mmu_seq, uhva);
+-              KVM_MMU_READ_UNLOCK(kvm);
+-              if (!retry)
+-                      break;
++              /*
++               * Other tasks must wait for _this_ refresh to complete before
++               * attempting to refresh.
++               */
++              WARN_ON_ONCE(gpc->valid);
++      } while (mmu_notifier_retry_cache(kvm, mmu_seq));
+ 
+-              cond_resched();
+-      } while (1);
++      gpc->valid = true;
++      gpc->pfn = new_pfn;
++      gpc->khva = new_khva + (gpc->gpa & ~PAGE_MASK);
++      return 0;
++
++out_error:
++      write_lock_irq(&gpc->lock);
+ 
+-      return new_pfn;
++      return -EFAULT;
+ }
+ 
+ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+@@ -147,7 +238,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+       kvm_pfn_t old_pfn, new_pfn;
+       unsigned long old_uhva;
+       void *old_khva;
+-      bool old_valid;
+       int ret = 0;
+ 
+       /*
+@@ -169,7 +259,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+       old_pfn = gpc->pfn;
+       old_khva = gpc->khva - offset_in_page(gpc->khva);
+       old_uhva = gpc->uhva;
+-      old_valid = gpc->valid;
+ 
+       /* If the userspace HVA is invalid, refresh that first */
+       if (gpc->gpa != gpa || gpc->generation != slots->generation ||
+@@ -182,7 +271,6 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+               gpc->uhva = gfn_to_hva_memslot(gpc->memslot, gfn);
+ 
+               if (kvm_is_error_hva(gpc->uhva)) {
+-                      gpc->pfn = KVM_PFN_ERR_FAULT;
+                       ret = -EFAULT;
+                       goto out;
+               }
+@@ -192,60 +280,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+        * If the userspace HVA changed or the PFN was already invalid,
+        * drop the lock and do the HVA to PFN lookup again.
+        */
+-      if (!old_valid || old_uhva != gpc->uhva) {
+-              unsigned long uhva = gpc->uhva;
+-              void *new_khva = NULL;
+-
+-              /* Placeholders for "hva is valid but not yet mapped" */
+-              gpc->pfn = KVM_PFN_ERR_FAULT;
+-              gpc->khva = NULL;
+-              gpc->valid = true;
+-
+-              write_unlock_irq(&gpc->lock);
+-
+-              new_pfn = hva_to_pfn_retry(kvm, uhva);
+-              if (is_error_noslot_pfn(new_pfn)) {
+-                      ret = -EFAULT;
+-                      goto map_done;
+-              }
+-
+-              if (gpc->usage & KVM_HOST_USES_PFN) {
+-                      if (new_pfn == old_pfn) {
+-                              /*
+-                               * Reuse the existing pfn and khva, but put the
+-                               * reference acquired hva_to_pfn_retry(); the
+-                               * cache still holds a reference to the pfn
+-                               * from the previous refresh.
+-                               */
+-                              gpc_release_pfn_and_khva(kvm, new_pfn, NULL);
+-
+-                              new_khva = old_khva;
+-                              old_pfn = KVM_PFN_ERR_FAULT;
+-                              old_khva = NULL;
+-                      } else if (pfn_valid(new_pfn)) {
+-                              new_khva = kmap(pfn_to_page(new_pfn));
+-#ifdef CONFIG_HAS_IOMEM
+-                      } else {
+-                              new_khva = memremap(pfn_to_hpa(new_pfn), PAGE_SIZE, MEMREMAP_WB);
+-#endif
+-                      }
+-                      if (new_khva)
+-                              new_khva += page_offset;
+-                      else
+-                              ret = -EFAULT;
+-              }
+-
+-      map_done:
+-              write_lock_irq(&gpc->lock);
+-              if (ret) {
+-                      gpc->valid = false;
+-                      gpc->pfn = KVM_PFN_ERR_FAULT;
+-                      gpc->khva = NULL;
+-              } else {
+-                      /* At this point, gpc->valid may already have been cleared */
+-                      gpc->pfn = new_pfn;
+-                      gpc->khva = new_khva;
+-              }
++      if (!gpc->valid || old_uhva != gpc->uhva) {
++              ret = hva_to_pfn_retry(kvm, gpc);
+       } else {
+               /* If the HVA→PFN mapping was already valid, don't unmap it. */
+               old_pfn = KVM_PFN_ERR_FAULT;
+@@ -253,11 +289,26 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+       }
+ 
+  out:
++      /*
++       * Invalidate the cache and purge the pfn/khva if the refresh failed.
++       * Some/all of the uhva, gpa, and memslot generation info may still be
++       * valid, leave it as is.
++       */
++      if (ret) {
++              gpc->valid = false;
++              gpc->pfn = KVM_PFN_ERR_FAULT;
++              gpc->khva = NULL;
++      }
++
++      /* Snapshot the new pfn before dropping the lock! */
++      new_pfn = gpc->pfn;
++
+       write_unlock_irq(&gpc->lock);
+ 
+       mutex_unlock(&gpc->refresh_lock);
+ 
+-      gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
++      if (old_pfn != new_pfn)
++              gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+       return ret;
+ }
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch-7350 b/queue-5.19/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch-7350

new file mode 100644 (file)

index 0000000..854a666
--- /dev/null
+++ b/queue-5.19/kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch-7350
@@ -0,0 +1,112 @@
+From 52ddf24c3e8dd515588fd249a9c0e59735e9281e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 21:00:23 +0000
+Subject: KVM: Fully serialize gfn=>pfn cache refresh via mutex
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 93984f19e7bce4c18084a6ef3dacafb155b806ed ]
+
+Protect gfn=>pfn cache refresh with a mutex to fully serialize refreshes.
+The refresh logic doesn't protect against
+
+- concurrent unmaps, or refreshes with different GPAs (which may or may not
+  happen in practice, for example if a cache is only used under vcpu->mutex;
+  but it's allowed in the code)
+
+- a false negative on the memslot generation.  If the first refresh sees
+  a stale memslot generation, it will refresh the hva and generation before
+  moving on to the hva=>pfn translation.  If it then drops gpc->lock, a
+  different user of the cache can come along, acquire gpc->lock, see that
+  the memslot generation is fresh, and skip the hva=>pfn update due to the
+  userspace address also matching (because it too was updated).
+
+The refresh path can already sleep during hva=>pfn resolution, so wrap
+the refresh with a mutex to ensure that any given refresh runs to
+completion before other callers can start their refresh.
+
+Cc: stable@vger.kernel.org
+Cc: Lai Jiangshan <jiangshanlai@gmail.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-7-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/kvm_types.h |  2 ++
+ virt/kvm/pfncache.c       | 12 ++++++++++++
+ 2 files changed, 14 insertions(+)
+
+diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
+index ac1ebb37a0ff..f328a01db4fe 100644
+--- a/include/linux/kvm_types.h
++++ b/include/linux/kvm_types.h
+@@ -19,6 +19,7 @@ struct kvm_memslots;
+ enum kvm_mr_change;
+ 
+ #include <linux/bits.h>
++#include <linux/mutex.h>
+ #include <linux/types.h>
+ #include <linux/spinlock_types.h>
+ 
+@@ -69,6 +70,7 @@ struct gfn_to_pfn_cache {
+       struct kvm_vcpu *vcpu;
+       struct list_head list;
+       rwlock_t lock;
++      struct mutex refresh_lock;
+       void *khva;
+       kvm_pfn_t pfn;
+       enum pfn_cache_usage usage;
+diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
+index 05cb0bcbf662..f610d3945b69 100644
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -157,6 +157,13 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+       if (page_offset + len > PAGE_SIZE)
+               return -EINVAL;
+ 
++      /*
++       * If another task is refreshing the cache, wait for it to complete.
++       * There is no guarantee that concurrent refreshes will see the same
++       * gpa, memslots generation, etc..., so they must be fully serialized.
++       */
++      mutex_lock(&gpc->refresh_lock);
++
+       write_lock_irq(&gpc->lock);
+ 
+       old_pfn = gpc->pfn;
+@@ -248,6 +255,8 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+  out:
+       write_unlock_irq(&gpc->lock);
+ 
++      mutex_unlock(&gpc->refresh_lock);
++
+       gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ 
+       return ret;
+@@ -259,6 +268,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+       void *old_khva;
+       kvm_pfn_t old_pfn;
+ 
++      mutex_lock(&gpc->refresh_lock);
+       write_lock_irq(&gpc->lock);
+ 
+       gpc->valid = false;
+@@ -274,6 +284,7 @@ void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc)
+       gpc->pfn = KVM_PFN_ERR_FAULT;
+ 
+       write_unlock_irq(&gpc->lock);
++      mutex_unlock(&gpc->refresh_lock);
+ 
+       gpc_release_pfn_and_khva(kvm, old_pfn, old_khva);
+ }
+@@ -288,6 +299,7 @@ int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ 
+       if (!gpc->active) {
+               rwlock_init(&gpc->lock);
++              mutex_init(&gpc->refresh_lock);
+ 
+               gpc->khva = NULL;
+               gpc->pfn = KVM_PFN_ERR_FAULT;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consis.patch b/queue-5.19/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consis.patch

new file mode 100644 (file)

index 0000000..10dad5d
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consis.patch
@@ -0,0 +1,50 @@
+From d767abce1471976905ba9734b7e1e3756377d9e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 21:35:51 +0000
+Subject: KVM: nVMX: Account for KVM reserved CR4 bits in consistency checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit ca58f3aa53d165afe4ab74c755bc2f6d168617ac ]
+
+Check that the guest (L2) and host (L1) CR4 values that would be loaded
+by nested VM-Enter and VM-Exit respectively are valid with respect to
+KVM's (L0 host) allowed CR4 bits.  Failure to check KVM reserved bits
+would allow L1 to load an illegal CR4 (or trigger hardware VM-Fail or
+failed VM-Entry) by massaging guest CPUID to allow features that are not
+supported by KVM.  Amusingly, KVM itself is an accomplice in its doom, as
+KVM adjusts L1's MSR_IA32_VMX_CR4_FIXED1 to allow L1 to enable bits for
+L2 based on L1's CPUID model.
+
+Note, although nested_{guest,host}_cr4_valid() are _currently_ used if
+and only if the vCPU is post-VMXON (nested.vmxon == true), that may not
+be true in the future, e.g. emulating VMXON has a bug where it doesn't
+check the allowed/required CR0/CR4 bits.
+
+Cc: stable@vger.kernel.org
+Fixes: 3899152ccbf4 ("KVM: nVMX: fix checks on CR{0,4} during virtual VMX operation")
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.h | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.h b/arch/x86/kvm/vmx/nested.h
+index c92cea0b8ccc..129ae4e01f7c 100644
+--- a/arch/x86/kvm/vmx/nested.h
++++ b/arch/x86/kvm/vmx/nested.h
+@@ -281,7 +281,8 @@ static inline bool nested_cr4_valid(struct kvm_vcpu *vcpu, unsigned long val)
+       u64 fixed0 = to_vmx(vcpu)->nested.msrs.cr4_fixed0;
+       u64 fixed1 = to_vmx(vcpu)->nested.msrs.cr4_fixed1;
+ 
+-      return fixed_bits_valid(val, fixed0, fixed1);
++      return fixed_bits_valid(val, fixed0, fixed1) &&
++             __kvm_is_valid_cr4(vcpu, val);
+ }
+ 
+ /* No difference in the restrictions on guest and host CR4 in VMX operation. */
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch b/queue-5.19/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch

new file mode 100644 (file)

index 0000000..9cf8170
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch
@@ -0,0 +1,78 @@
+From 191e139fd662eccd2665dbcc0a0e9ed14d0614a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:08 +0000
+Subject: KVM: nVMX: Attempt to load PERF_GLOBAL_CTRL on nVMX xfer iff it
+ exists
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 4496a6f9b45e8cd83343ad86a3984d614e22cf54 ]
+
+Attempt to load PERF_GLOBAL_CTRL during nested VM-Enter/VM-Exit if and
+only if the MSR exists (according to the guest vCPU model).  KVM has very
+misguided handling of VM_{ENTRY,EXIT}_LOAD_IA32_PERF_GLOBAL_CTRL and
+attempts to force the nVMX MSR settings to match the vPMU model, i.e. to
+hide/expose the control based on whether or not the MSR exists from the
+guest's perspective.
+
+KVM's modifications fail to handle the scenario where the vPMU is hidden
+from the guest _after_ being exposed to the guest, e.g. by userspace
+doing multiple KVM_SET_CPUID2 calls, which is allowed if done before any
+KVM_RUN.  nested_vmx_pmu_refresh() is called if and only if there's a
+recognized vPMU, i.e. KVM will leave the bits in the allow state and then
+ultimately reject the MSR load and WARN.
+
+KVM should not force the VMX MSRs in the first place.  KVM taking control
+of the MSRs was a misguided attempt at mimicking what commit 5f76f6f5ff96
+("KVM: nVMX: Do not expose MPX VMX controls when guest MPX disabled",
+2018-10-01) did for MPX.  However, the MPX commit was a workaround for
+another KVM bug and not something that should be imitated (and it should
+never been done in the first place).
+
+In other words, KVM's ABI _should_ be that userspace has full control
+over the MSRs, at which point triggering the WARN that loading the MSR
+must not fail is trivial.
+
+The intent of the WARN is still valid; KVM has consistency checks to
+ensure that vmcs12->{guest,host}_ia32_perf_global_ctrl is valid.  The
+problem is that '0' must be considered a valid value at all times, and so
+the simple/obvious solution is to just not actually load the MSR when it
+does not exist.  It is userspace's responsibility to provide a sane vCPU
+model, i.e. KVM is well within its ABI and Intel's VMX architecture to
+skip the loads if the MSR does not exist.
+
+Fixes: 03a8871add95 ("KVM: nVMX: Expose load IA32_PERF_GLOBAL_CTRL VM-{Entry,Exit} control")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 66735fbb791d..ef21c5fe172e 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -2617,6 +2617,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
+       }
+ 
+       if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
+           WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                                    vmcs12->guest_ia32_perf_global_ctrl))) {
+               *entry_failure_code = ENTRY_FAIL_DEFAULT;
+@@ -4342,7 +4343,8 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
+               vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
+               vcpu->arch.pat = vmcs12->host_ia32_pat;
+       }
+-      if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL)
++      if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
++          intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
+               WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
+                                        vmcs12->host_ia32_perf_global_ctrl));
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incomp.patch b/queue-5.19/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incomp.patch

new file mode 100644 (file)

index 0000000..41d0446
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incomp.patch
@@ -0,0 +1,80 @@
+From 452c457abb5411372c478172ce17b997bce37923 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 21:35:52 +0000
+Subject: KVM: nVMX: Inject #UD if VMXON is attempted with incompatible CR0/CR4
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit c7d855c2aff2d511fd60ee2e356134c4fb394799 ]
+
+Inject a #UD if L1 attempts VMXON with a CR0 or CR4 that is disallowed
+per the associated nested VMX MSRs' fixed0/1 settings.  KVM cannot rely
+on hardware to perform the checks, even for the few checks that have
+higher priority than VM-Exit, as (a) KVM may have forced CR0/CR4 bits in
+hardware while running the guest, (b) there may incompatible CR0/CR4 bits
+that have lower priority than VM-Exit, e.g. CR0.NE, and (c) userspace may
+have further restricted the allowed CR0/CR4 values by manipulating the
+guest's nested VMX MSRs.
+
+Note, despite a very strong desire to throw shade at Jim, commit
+70f3aac964ae ("kvm: nVMX: Remove superfluous VMX instruction fault checks")
+is not to blame for the buggy behavior (though the comment...).  That
+commit only removed the CR0.PE, EFLAGS.VM, and COMPATIBILITY mode checks
+(though it did erroneously drop the CPL check, but that has already been
+remedied).  KVM may force CR0.PE=1, but will do so only when also
+forcing EFLAGS.VM=1 to emulate Real Mode, i.e. hardware will still #UD.
+
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=216033
+Fixes: ec378aeef9df ("KVM: nVMX: Implement VMXON and VMXOFF")
+Reported-by: Eric Li <ercli@ucdavis.edu>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 23 ++++++++++++++---------
+ 1 file changed, 14 insertions(+), 9 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index 30babb471ae3..f3b500b8475f 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -4964,20 +4964,25 @@ static int handle_vmon(struct kvm_vcpu *vcpu)
+               | FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
+ 
+       /*
+-       * The Intel VMX Instruction Reference lists a bunch of bits that are
+-       * prerequisite to running VMXON, most notably cr4.VMXE must be set to
+-       * 1 (see vmx_is_valid_cr4() for when we allow the guest to set this).
+-       * Otherwise, we should fail with #UD.  But most faulting conditions
+-       * have already been checked by hardware, prior to the VM-exit for
+-       * VMXON.  We do test guest cr4.VMXE because processor CR4 always has
+-       * that bit set to 1 in non-root mode.
++       * Note, KVM cannot rely on hardware to perform the CR0/CR4 #UD checks
++       * that have higher priority than VM-Exit (see Intel SDM's pseudocode
++       * for VMXON), as KVM must load valid CR0/CR4 values into hardware while
++       * running the guest, i.e. KVM needs to check the _guest_ values.
++       *
++       * Rely on hardware for the other two pre-VM-Exit checks, !VM86 and
++       * !COMPATIBILITY modes.  KVM may run the guest in VM86 to emulate Real
++       * Mode, but KVM will never take the guest out of those modes.
+        */
+-      if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
++      if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
++          !nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
+               kvm_queue_exception(vcpu, UD_VECTOR);
+               return 1;
+       }
+ 
+-      /* CPL=0 must be checked manually. */
++      /*
++       * CPL=0 and all other checks that are lower priority than VM-Exit must
++       * be checked manually.
++       */
+       if (vmx_get_cpl(vcpu)) {
+               kvm_inject_gp(vcpu, 0);
+               return 1;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-su.patch b/queue-5.19/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-su.patch

new file mode 100644 (file)

index 0000000..a3c4c72
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-su.patch
@@ -0,0 +1,180 @@
+From 386e4970d04eb231b4e6fb4a700344e6164b67e8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 21:35:54 +0000
+Subject: KVM: nVMX: Let userspace set nVMX MSR to any _host_ supported value
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit f8ae08f9789ad59d318ea75b570caa454aceda81 ]
+
+Restrict the nVMX MSRs based on KVM's config, not based on the guest's
+current config.  Using the guest's config to audit the new config
+prevents userspace from restoring the original config (KVM's config) if
+at any point in the past the guest's config was restricted in any way.
+
+Fixes: 62cc6b9dc61e ("KVM: nVMX: support restore of VMX capability MSRs")
+Cc: stable@vger.kernel.org
+Cc: David Matlack <dmatlack@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-6-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 70 +++++++++++++++++++++------------------
+ 1 file changed, 37 insertions(+), 33 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index f3b500b8475f..66735fbb791d 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -1223,7 +1223,7 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+               BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
+               /* reserved */
+               BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
+-      u64 vmx_basic = vmx->nested.msrs.basic;
++      u64 vmx_basic = vmcs_config.nested.basic;
+ 
+       if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
+               return -EINVAL;
+@@ -1246,36 +1246,42 @@ static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
+       return 0;
+ }
+ 
+-static int
+-vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
++                              u32 **low, u32 **high)
+ {
+-      u64 supported;
+-      u32 *lowp, *highp;
+-
+       switch (msr_index) {
+       case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
+-              lowp = &vmx->nested.msrs.pinbased_ctls_low;
+-              highp = &vmx->nested.msrs.pinbased_ctls_high;
++              *low = &msrs->pinbased_ctls_low;
++              *high = &msrs->pinbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
+-              lowp = &vmx->nested.msrs.procbased_ctls_low;
+-              highp = &vmx->nested.msrs.procbased_ctls_high;
++              *low = &msrs->procbased_ctls_low;
++              *high = &msrs->procbased_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_EXIT_CTLS:
+-              lowp = &vmx->nested.msrs.exit_ctls_low;
+-              highp = &vmx->nested.msrs.exit_ctls_high;
++              *low = &msrs->exit_ctls_low;
++              *high = &msrs->exit_ctls_high;
+               break;
+       case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
+-              lowp = &vmx->nested.msrs.entry_ctls_low;
+-              highp = &vmx->nested.msrs.entry_ctls_high;
++              *low = &msrs->entry_ctls_low;
++              *high = &msrs->entry_ctls_high;
+               break;
+       case MSR_IA32_VMX_PROCBASED_CTLS2:
+-              lowp = &vmx->nested.msrs.secondary_ctls_low;
+-              highp = &vmx->nested.msrs.secondary_ctls_high;
++              *low = &msrs->secondary_ctls_low;
++              *high = &msrs->secondary_ctls_high;
+               break;
+       default:
+               BUG();
+       }
++}
++
++static int
++vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++      u32 *lowp, *highp;
++      u64 supported;
++
++      vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
+ 
+       supported = vmx_control_msr(*lowp, *highp);
+ 
+@@ -1287,6 +1293,7 @@ vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+       if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
+               return -EINVAL;
+ 
++      vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
+       *lowp = data;
+       *highp = data >> 32;
+       return 0;
+@@ -1300,10 +1307,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+               BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
+               /* reserved */
+               GENMASK_ULL(13, 9) | BIT_ULL(31);
+-      u64 vmx_misc;
+-
+-      vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
+-                                 vmx->nested.msrs.misc_high);
++      u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
++                                     vmcs_config.nested.misc_high);
+ 
+       if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
+               return -EINVAL;
+@@ -1331,10 +1336,8 @@ static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
+ 
+ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+ {
+-      u64 vmx_ept_vpid_cap;
+-
+-      vmx_ept_vpid_cap = vmx_control_msr(vmx->nested.msrs.ept_caps,
+-                                         vmx->nested.msrs.vpid_caps);
++      u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
++                                             vmcs_config.nested.vpid_caps);
+ 
+       /* Every bit is either reserved or a feature bit. */
+       if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
+@@ -1345,20 +1348,21 @@ static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
+       return 0;
+ }
+ 
+-static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
+ {
+-      u64 *msr;
+-
+       switch (msr_index) {
+       case MSR_IA32_VMX_CR0_FIXED0:
+-              msr = &vmx->nested.msrs.cr0_fixed0;
+-              break;
++              return &msrs->cr0_fixed0;
+       case MSR_IA32_VMX_CR4_FIXED0:
+-              msr = &vmx->nested.msrs.cr4_fixed0;
+-              break;
++              return &msrs->cr4_fixed0;
+       default:
+               BUG();
+       }
++}
++
++static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
++{
++      const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
+ 
+       /*
+        * 1 bits (which indicates bits which "must-be-1" during VMX operation)
+@@ -1367,7 +1371,7 @@ static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
+       if (!is_bitwise_subset(data, *msr, -1ULL))
+               return -EINVAL;
+ 
+-      *msr = data;
++      *vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
+       return 0;
+ }
+ 
+@@ -1428,7 +1432,7 @@ int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+               vmx->nested.msrs.vmcs_enum = data;
+               return 0;
+       case MSR_IA32_VMX_VMFUNC:
+-              if (data & ~vmx->nested.msrs.vmfunc_controls)
++              if (data & ~vmcs_config.nested.vmfunc_controls)
+                       return -EINVAL;
+               vmx->nested.msrs.vmfunc_controls = data;
+               return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_ru.patch b/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_ru.patch

new file mode 100644 (file)

index 0000000..dee3062
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_ru.patch
@@ -0,0 +1,58 @@
+From b52bbbb1b583491cdb74bb02fc84bec3ec4dbe2d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 21:58:27 +0000
+Subject: KVM: nVMX: Snapshot pre-VM-Enter BNDCFGS for !nested_run_pending case
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit fa578398a0ba2c079fa1170da21fa5baae0cedb2 ]
+
+If a nested run isn't pending, snapshot vmcs01.GUEST_BNDCFGS irrespective
+of whether or not VM_ENTRY_LOAD_BNDCFGS is set in vmcs12.  When restoring
+nested state, e.g. after migration, without a nested run pending,
+prepare_vmcs02() will propagate nested.vmcs01_guest_bndcfgs to vmcs02,
+i.e. will load garbage/zeros into vmcs02.GUEST_BNDCFGS.
+
+If userspace restores nested state before MSRs, then loading garbage is a
+non-issue as loading BNDCFGS will also update vmcs02.  But if usersepace
+restores MSRs first, then KVM is responsible for propagating L2's value,
+which is actually thrown into vmcs01, into vmcs02.
+
+Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
+is all kinds of bizarre and ideally would not be supported.  Sadly, some
+VMMs do exactly that and rely on KVM to make things work.
+
+Note, there's still a lurking SMM bug, as propagating vmcs01.GUEST_BNDFGS
+to vmcs02 across RSM may corrupt L2's BNDCFGS.  But KVM's entire VMX+SMM
+emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
+"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
+
+Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
+Fixes: 62cf9bd8118c ("KVM: nVMX: Fix emulation of VM_ENTRY_LOAD_BNDCFGS")
+Cc: stable@vger.kernel.org
+Cc: Lei Wang <lei4.wang@intel.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220614215831.3762138-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index ab135f9ef52f..d6cb040966f9 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3376,7 +3376,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+       if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+               vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+       if (kvm_mpx_supported() &&
+-              !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
++          (!vmx->nested.nested_run_pending ||
++           !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
+               vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
+ 
+       /*
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_r.patch b/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_r.patch

new file mode 100644 (file)

index 0000000..576e2c6
--- /dev/null
+++ b/queue-5.19/kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_r.patch
@@ -0,0 +1,59 @@
+From b840ad0bc32e8dc61c68c80d5a88dd6f19b2c01e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jun 2022 21:58:28 +0000
+Subject: KVM: nVMX: Snapshot pre-VM-Enter DEBUGCTL for !nested_run_pending
+ case
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 764643a6be07445308e492a528197044c801b3ba ]
+
+If a nested run isn't pending, snapshot vmcs01.GUEST_IA32_DEBUGCTL
+irrespective of whether or not VM_ENTRY_LOAD_DEBUG_CONTROLS is set in
+vmcs12.  When restoring nested state, e.g. after migration, without a
+nested run pending, prepare_vmcs02() will propagate
+nested.vmcs01_debugctl to vmcs02, i.e. will load garbage/zeros into
+vmcs02.GUEST_IA32_DEBUGCTL.
+
+If userspace restores nested state before MSRs, then loading garbage is a
+non-issue as loading DEBUGCTL will also update vmcs02.  But if usersepace
+restores MSRs first, then KVM is responsible for propagating L2's value,
+which is actually thrown into vmcs01, into vmcs02.
+
+Restoring L2 MSRs into vmcs01, i.e. loading all MSRs before nested state
+is all kinds of bizarre and ideally would not be supported.  Sadly, some
+VMMs do exactly that and rely on KVM to make things work.
+
+Note, there's still a lurking SMM bug, as propagating vmcs01's DEBUGCTL
+to vmcs02 across RSM may corrupt L2's DEBUGCTL.  But KVM's entire VMX+SMM
+emulation is flawed as SMI+RSM should not toouch _any_ VMCS when use the
+"default treatment of SMIs", i.e. when not using an SMI Transfer Monitor.
+
+Link: https://lore.kernel.org/all/Yobt1XwOfb5M6Dfa@google.com
+Fixes: 8fcc4b5923af ("kvm: nVMX: Introduce KVM_CAP_NESTED_STATE")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220614215831.3762138-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/nested.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c
+index d6cb040966f9..30babb471ae3 100644
+--- a/arch/x86/kvm/vmx/nested.c
++++ b/arch/x86/kvm/vmx/nested.c
+@@ -3373,7 +3373,8 @@ enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
+       if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
+               evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
+ 
+-      if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
++      if (!vmx->nested.nested_run_pending ||
++          !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
+               vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
+       if (kvm_mpx_supported() &&
+           (!vmx->nested.nested_run_pending ||
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-i.patch b/queue-5.19/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-i.patch

new file mode 100644 (file)

index 0000000..4efe100
--- /dev/null
+++ b/queue-5.19/kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-i.patch
@@ -0,0 +1,46 @@
+From 4094f6c440d7010664567898b9271e38ef241895 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Apr 2022 21:00:21 +0000
+Subject: KVM: Put the extra pfn reference when reusing a pfn in the gpc cache
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 3dddf65b4f4c451c345d34ae85bdf1791a746e49 ]
+
+Put the struct page reference to pfn acquired by hva_to_pfn() when the
+old and new pfns for a gfn=>pfn cache match.  The cache already has a
+reference via the old/current pfn, and will only put one reference when
+the cache is done with the pfn.
+
+Fixes: 982ed0de4753 ("KVM: Reinstate gfn_to_pfn_cache with invalidation support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220429210025.3293691-5-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ virt/kvm/pfncache.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/virt/kvm/pfncache.c b/virt/kvm/pfncache.c
+index e05a6a1b8eff..40cbe90d52e0 100644
+--- a/virt/kvm/pfncache.c
++++ b/virt/kvm/pfncache.c
+@@ -206,6 +206,14 @@ int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
+ 
+               if (gpc->usage & KVM_HOST_USES_PFN) {
+                       if (new_pfn == old_pfn) {
++                              /*
++                               * Reuse the existing pfn and khva, but put the
++                               * reference acquired hva_to_pfn_retry(); the
++                               * cache still holds a reference to the pfn
++                               * from the previous refresh.
++                               */
++                              gpc_release_pfn_and_khva(kvm, new_pfn, NULL);
++
+                               new_khva = old_khva;
+                               old_pfn = KVM_PFN_ERR_FAULT;
+                               old_khva = NULL;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch-16826 b/queue-5.19/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch-16826

new file mode 100644 (file)

index 0000000..6c91603
--- /dev/null
+++ b/queue-5.19/kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch-16826
@@ -0,0 +1,107 @@
+From 312aee67f8e93778405734e49ade001a9fad4211 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 15:04:34 +0200
+Subject: KVM: s390: pv: don't present the ecall interrupt twice
+
+From: Nico Boehr <nrb@linux.ibm.com>
+
+[ Upstream commit c3f0e5fd2d33d80c5a5a8b5e5d2bab2841709cc8 ]
+
+When the SIGP interpretation facility is present and a VCPU sends an
+ecall to another VCPU in enabled wait, the sending VCPU receives a 56
+intercept (partial execution), so KVM can wake up the receiving CPU.
+Note that the SIGP interpretation facility will take care of the
+interrupt delivery and KVM's only job is to wake the receiving VCPU.
+
+For PV, the sending VCPU will receive a 108 intercept (pv notify) and
+should continue like in the non-PV case, i.e. wake the receiving VCPU.
+
+For PV and non-PV guests the interrupt delivery will occur through the
+SIGP interpretation facility on SIE entry when SIE finds the X bit in
+the status field set.
+
+However, in handle_pv_notification(), there was no special handling for
+SIGP, which leads to interrupt injection being requested by KVM for the
+next SIE entry. This results in the interrupt being delivered twice:
+once by the SIGP interpretation facility and once by KVM through the
+IICTL.
+
+Add the necessary special handling in handle_pv_notification(), similar
+to handle_partial_execution(), which simply wakes the receiving VCPU and
+leave interrupt delivery to the SIGP interpretation facility.
+
+In contrast to external calls, emergency calls are not interpreted but
+also cause a 108 intercept, which is why we still need to call
+handle_instruction() for SIGP orders other than ecall.
+
+Since kvm_s390_handle_sigp_pei() is now called for all SIGP orders which
+cause a 108 intercept - even if they are actually handled by
+handle_instruction() - move the tracepoint in kvm_s390_handle_sigp_pei()
+to avoid possibly confusing trace messages.
+
+Signed-off-by: Nico Boehr <nrb@linux.ibm.com>
+Cc: <stable@vger.kernel.org> # 5.7
+Fixes: da24a0cc58ed ("KVM: s390: protvirt: Instruction emulation")
+Reviewed-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Reviewed-by: Janosch Frank <frankja@linux.ibm.com>
+Reviewed-by: Christian Borntraeger <borntraeger@linux.ibm.com>
+Link: https://lore.kernel.org/r/20220718130434.73302-1-nrb@linux.ibm.com
+Message-Id: <20220718130434.73302-1-nrb@linux.ibm.com>
+Signed-off-by: Claudio Imbrenda <imbrenda@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/kvm/intercept.c | 15 +++++++++++++++
+ arch/s390/kvm/sigp.c      |  4 ++--
+ 2 files changed, 17 insertions(+), 2 deletions(-)
+
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index 8bd42a20d924..88112065d941 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -528,12 +528,27 @@ static int handle_pv_uvc(struct kvm_vcpu *vcpu)
+ 
+ static int handle_pv_notification(struct kvm_vcpu *vcpu)
+ {
++      int ret;
++
+       if (vcpu->arch.sie_block->ipa == 0xb210)
+               return handle_pv_spx(vcpu);
+       if (vcpu->arch.sie_block->ipa == 0xb220)
+               return handle_pv_sclp(vcpu);
+       if (vcpu->arch.sie_block->ipa == 0xb9a4)
+               return handle_pv_uvc(vcpu);
++      if (vcpu->arch.sie_block->ipa >> 8 == 0xae) {
++              /*
++               * Besides external call, other SIGP orders also cause a
++               * 108 (pv notify) intercept. In contrast to external call,
++               * these orders need to be emulated and hence the appropriate
++               * place to handle them is in handle_instruction().
++               * So first try kvm_s390_handle_sigp_pei() and if that isn't
++               * successful, go on with handle_instruction().
++               */
++              ret = kvm_s390_handle_sigp_pei(vcpu);
++              if (!ret)
++                      return ret;
++      }
+ 
+       return handle_instruction(vcpu);
+ }
+diff --git a/arch/s390/kvm/sigp.c b/arch/s390/kvm/sigp.c
+index 8aaee2892ec3..cb747bf6c798 100644
+--- a/arch/s390/kvm/sigp.c
++++ b/arch/s390/kvm/sigp.c
+@@ -480,9 +480,9 @@ int kvm_s390_handle_sigp_pei(struct kvm_vcpu *vcpu)
+       struct kvm_vcpu *dest_vcpu;
+       u8 order_code = kvm_s390_get_base_disp_rs(vcpu, NULL);
+ 
+-      trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
+-
+       if (order_code == SIGP_EXTERNAL_CALL) {
++              trace_kvm_s390_handle_sigp_pei(vcpu, order_code, cpu_addr);
++
+               dest_vcpu = kvm_get_vcpu_by_id(vcpu->kvm, cpu_addr);
+               BUG_ON(dest_vcpu == NULL);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch b/queue-5.19/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch

new file mode 100644 (file)

index 0000000..99a744c
--- /dev/null
+++ b/queue-5.19/kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch
@@ -0,0 +1,71 @@
+From 21737e4624e8a75e7b3b842ce918e7a24306d92c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 21 May 2022 08:15:11 +0000
+Subject: KVM: set_msr_mce: Permit guests to ignore single-bit ECC errors
+
+From: Lev Kujawski <lkujaw@member.fsf.org>
+
+[ Upstream commit 0471a7bd1bca2a47a5f378f2222c5cf39ce94152 ]
+
+Certain guest operating systems (e.g., UNIXWARE) clear bit 0 of
+MC1_CTL to ignore single-bit ECC data errors.  Single-bit ECC data
+errors are always correctable and thus are safe to ignore because they
+are informational in nature rather than signaling a loss of data
+integrity.
+
+Prior to this patch, these guests would crash upon writing MC1_CTL,
+with resultant error messages like the following:
+
+error: kvm run failed Operation not permitted
+EAX=fffffffe EBX=fffffffe ECX=00000404 EDX=ffffffff
+ESI=ffffffff EDI=00000001 EBP=fffdaba4 ESP=fffdab20
+EIP=c01333a5 EFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0
+ES =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+CS =0100 00000000 ffffffff 00c09b00 DPL=0 CS32 [-RA]
+SS =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+DS =0108 00000000 ffffffff 00c09300 DPL=0 DS   [-WA]
+FS =0000 00000000 ffffffff 00c00000
+GS =0000 00000000 ffffffff 00c00000
+LDT=0118 c1026390 00000047 00008200 DPL=0 LDT
+TR =0110 ffff5af0 00000067 00008b00 DPL=0 TSS32-busy
+GDT=     ffff5020 000002cf
+IDT=     ffff52f0 000007ff
+CR0=8001003b CR2=00000000 CR3=0100a000 CR4=00000230
+DR0=00000000 DR1=00000000 DR2=00000000 DR3=00000000
+DR6=ffff0ff0 DR7=00000400
+EFER=0000000000000000
+Code=08 89 01 89 51 04 c3 8b 4c 24 08 8b 01 8b 51 04 8b 4c 24 04 <0f>
+30 c3 f7 05 a4 6d ff ff 10 00 00 00 74 03 0f 31 c3 33 c0 33 d2 c3 8d
+74 26 00 0f 31 c3
+
+Signed-off-by: Lev Kujawski <lkujaw@member.fsf.org>
+Message-Id: <20220521081511.187388-1-lkujaw@member.fsf.org>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index e5fa335a4ea7..b2949f653564 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3239,10 +3239,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+                       /* only 0 or all 1s can be written to IA32_MCi_CTL
+                        * some Linux kernels though clear bit 10 in bank 4 to
+                        * workaround a BIOS/GART TBL issue on AMD K8s, ignore
+-                       * this to avoid an uncatched #GP in the guest
++                       * this to avoid an uncatched #GP in the guest.
++                       *
++                       * UNIXWARE clears bit 0 of MC1_CTL to ignore
++                       * correctable, single-bit ECC data errors.
+                        */
+                       if ((offset & 0x3) == 0 &&
+-                          data != 0 && (data | (1 << 10)) != ~(u64)0)
++                          data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+                               return -1;
+ 
+                       /* MCi_STATUS */
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-.patch b/queue-5.19/kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-.patch

new file mode 100644 (file)

index 0000000..509ec33
--- /dev/null
+++ b/queue-5.19/kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-.patch
@@ -0,0 +1,67 @@
+From 3f99761427448c41f59799a25942ebf1c92b772e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 2 May 2022 00:07:26 +0200
+Subject: KVM: SVM: Don't BUG if userspace injects an interrupt with GIF=0
+
+From: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+
+[ Upstream commit f17c31c48e5cde9895a491d91c424eeeada3e134 ]
+
+Don't BUG/WARN on interrupt injection due to GIF being cleared,
+since it's trivial for userspace to force the situation via
+KVM_SET_VCPU_EVENTS (even if having at least a WARN there would be correct
+for KVM internally generated injections).
+
+  kernel BUG at arch/x86/kvm/svm/svm.c:3386!
+  invalid opcode: 0000 [#1] SMP
+  CPU: 15 PID: 926 Comm: smm_test Not tainted 5.17.0-rc3+ #264
+  Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
+  RIP: 0010:svm_inject_irq+0xab/0xb0 [kvm_amd]
+  Code: <0f> 0b 0f 1f 00 0f 1f 44 00 00 80 3d ac b3 01 00 00 55 48 89 f5 53
+  RSP: 0018:ffffc90000b37d88 EFLAGS: 00010246
+  RAX: 0000000000000000 RBX: ffff88810a234ac0 RCX: 0000000000000006
+  RDX: 0000000000000000 RSI: ffffc90000b37df7 RDI: ffff88810a234ac0
+  RBP: ffffc90000b37df7 R08: ffff88810a1fa410 R09: 0000000000000000
+  R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
+  R13: ffff888109571000 R14: ffff88810a234ac0 R15: 0000000000000000
+  FS:  0000000001821380(0000) GS:ffff88846fdc0000(0000) knlGS:0000000000000000
+  CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+  CR2: 00007f74fc550008 CR3: 000000010a6fe000 CR4: 0000000000350ea0
+  Call Trace:
+   <TASK>
+   inject_pending_event+0x2f7/0x4c0 [kvm]
+   kvm_arch_vcpu_ioctl_run+0x791/0x17a0 [kvm]
+   kvm_vcpu_ioctl+0x26d/0x650 [kvm]
+   __x64_sys_ioctl+0x82/0xb0
+   do_syscall_64+0x3b/0xc0
+   entry_SYSCALL_64_after_hwframe+0x44/0xae
+   </TASK>
+
+Fixes: 219b65dcf6c0 ("KVM: SVM: Improve nested interrupt injection")
+Cc: stable@vger.kernel.org
+Co-developed-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Maciej S. Szmigiero <maciej.szmigiero@oracle.com>
+Message-Id: <35426af6e123cbe91ec7ce5132ce72521f02b1b5.1651440202.git.maciej.szmigiero@oracle.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/svm.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
+index 44bbf25dfeb9..e9f479acf941 100644
+--- a/arch/x86/kvm/svm/svm.c
++++ b/arch/x86/kvm/svm/svm.c
+@@ -3385,8 +3385,6 @@ static void svm_inject_irq(struct kvm_vcpu *vcpu)
+ {
+       struct vcpu_svm *svm = to_svm(vcpu);
+ 
+-      BUG_ON(!(gif_set(svm)));
+-
+       trace_kvm_inj_virq(vcpu->arch.interrupt.nr);
+       ++vcpu->stat.irq_injections;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch b/queue-5.19/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch

new file mode 100644 (file)

index 0000000..46baa92
--- /dev/null
+++ b/queue-5.19/kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch
@@ -0,0 +1,73 @@
+From 473d50e69be0ad6c8c3cf0408426287c89e1a146 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:07 +0000
+Subject: KVM: VMX: Add helper to check if the guest PMU has PERF_GLOBAL_CTRL
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit b663f0b5f3d665c261256d1f76e98f077c6e56af ]
+
+Add a helper to check of the guest PMU has PERF_GLOBAL_CTRL, which is
+unintuitive _and_ diverges from Intel's architecturally defined behavior.
+Even worse, KVM currently implements the check using two different (but
+equivalent) checks, _and_ there has been at least one attempt to add a
+_third_ flavor.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c |  4 ++--
+ arch/x86/kvm/vmx/vmx.h       | 12 ++++++++++++
+ 2 files changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 8faac421171f..a9280ebf78f5 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -98,7 +98,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
+ {
+       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ 
+-      if (pmu->version < 2)
++      if (!intel_pmu_has_perf_global_ctrl(pmu))
+               return true;
+ 
+       return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+@@ -215,7 +215,7 @@ static bool intel_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+       case MSR_CORE_PERF_GLOBAL_STATUS:
+       case MSR_CORE_PERF_GLOBAL_CTRL:
+       case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+-              ret = pmu->version > 1;
++              return intel_pmu_has_perf_global_ctrl(pmu);
+               break;
+       default:
+               ret = get_gp_pmc(pmu, msr, MSR_IA32_PERFCTR0) ||
+diff --git a/arch/x86/kvm/vmx/vmx.h b/arch/x86/kvm/vmx/vmx.h
+index 1e7f9453894b..93aa1f3ea01e 100644
+--- a/arch/x86/kvm/vmx/vmx.h
++++ b/arch/x86/kvm/vmx/vmx.h
+@@ -92,6 +92,18 @@ union vmx_exit_reason {
+       u32 full;
+ };
+ 
++static inline bool intel_pmu_has_perf_global_ctrl(struct kvm_pmu *pmu)
++{
++      /*
++       * Architecturally, Intel's SDM states that IA32_PERF_GLOBAL_CTRL is
++       * supported if "CPUID.0AH: EAX[7:0] > 0", i.e. if the PMU version is
++       * greater than zero.  However, KVM only exposes and emulates the MSR
++       * to/for the guest if the guest PMU supports at least "Architectural
++       * Performance Monitoring Version 2".
++       */
++      return pmu->version > 1;
++}
++
+ #define vcpu_to_lbr_desc(vcpu) (&to_vmx(vcpu)->lbr_desc)
+ #define vcpu_to_lbr_records(vcpu) (&to_vmx(vcpu)->lbr_desc.records)
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch b/queue-5.19/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch

new file mode 100644 (file)

index 0000000..25157ca
--- /dev/null
+++ b/queue-5.19/kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch
@@ -0,0 +1,43 @@
+From bb87d16c2e9b6f1e6bc41b54591e9e9797f6e416 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 22:44:06 +0000
+Subject: KVM: VMX: Mark all PERF_GLOBAL_(OVF)_CTRL bits reserved if there's no
+ vPMU
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 93255bf92939d948bc86d81c6bb70bb0fecc5db1 ]
+
+Mark all MSR_CORE_PERF_GLOBAL_CTRL and MSR_CORE_PERF_GLOBAL_OVF_CTRL bits
+as reserved if there is no guest vPMU.  The nVMX VM-Entry consistency
+checks do not check for a valid vPMU prior to consuming the masks via
+kvm_valid_perf_global_ctrl(), i.e. may incorrectly allow a non-zero mask
+to be loaded via VM-Enter or VM-Exit (well, attempted to be loaded, the
+actual MSR load will be rejected by intel_is_valid_msr()).
+
+Fixes: f5132b01386b ("KVM: Expose a version 2 architectural PMU to a guests")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220722224409.1336532-3-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 7e72022a00aa..2696b16f9283 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -488,6 +488,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       pmu->version = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++      pmu->global_ctrl_mask = ~0ull;
++      pmu->global_ovf_ctrl_mask = ~0ull;
+       pmu->fixed_ctr_ctrl_mask = ~0ull;
+ 
+       entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-a.patch b/queue-5.19/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-a.patch

new file mode 100644 (file)

index 0000000..5c91284
--- /dev/null
+++ b/queue-5.19/kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-a.patch
@@ -0,0 +1,71 @@
+From 5ef7132f5c1b5e760c2d86b0c56383dc22c9f3fc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 23:27:48 +0000
+Subject: KVM: x86: Mark TSS busy during LTR emulation _after_ all fault checks
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit ec6e4d863258d4bfb36d48d5e3ef68140234d688 ]
+
+Wait to mark the TSS as busy during LTR emulation until after all fault
+checks for the LTR have passed.  Specifically, don't mark the TSS busy if
+the new TSS base is non-canonical.
+
+Opportunistically drop the one-off !seg_desc.PRESENT check for TR as the
+only reason for the early check was to avoid marking a !PRESENT TSS as
+busy, i.e. the common !PRESENT is now done before setting the busy bit.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Reported-by: syzbot+760a73552f47a8cd0fd9@syzkaller.appspotmail.com
+Cc: stable@vger.kernel.org
+Cc: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
+Cc: Hou Wenlong <houwenlong.hwl@antgroup.com>
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-2-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/emulate.c | 19 +++++++++----------
+ 1 file changed, 9 insertions(+), 10 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index f8382abe22ff..93a969066d5c 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1687,16 +1687,6 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+       case VCPU_SREG_TR:
+               if (seg_desc.s || (seg_desc.type != 1 && seg_desc.type != 9))
+                       goto exception;
+-              if (!seg_desc.p) {
+-                      err_vec = NP_VECTOR;
+-                      goto exception;
+-              }
+-              old_desc = seg_desc;
+-              seg_desc.type |= 2; /* busy */
+-              ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
+-                                                sizeof(seg_desc), &ctxt->exception);
+-              if (ret != X86EMUL_CONTINUE)
+-                      return ret;
+               break;
+       case VCPU_SREG_LDTR:
+               if (seg_desc.s || seg_desc.type != 2)
+@@ -1737,6 +1727,15 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+                               ((u64)base3 << 32), ctxt))
+                       return emulate_gp(ctxt, 0);
+       }
++
++      if (seg == VCPU_SREG_TR) {
++              old_desc = seg_desc;
++              seg_desc.type |= 2; /* busy */
++              ret = ctxt->ops->cmpxchg_emulated(ctxt, desc_addr, &old_desc, &seg_desc,
++                                                sizeof(seg_desc), &ctxt->exception);
++              if (ret != X86EMUL_CONTINUE)
++                      return ret;
++      }
+ load:
+       ctxt->ops->set_segment(ctxt, selector, &seg_desc, base3, seg);
+       if (desc)
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-mmu-treat-nx-as-a-valid-spte-bit-for-npt.patch-3797 b/queue-5.19/kvm-x86-mmu-treat-nx-as-a-valid-spte-bit-for-npt.patch-3797

new file mode 100644 (file)

index 0000000..c10c878
--- /dev/null
+++ b/queue-5.19/kvm-x86-mmu-treat-nx-as-a-valid-spte-bit-for-npt.patch-3797
@@ -0,0 +1,70 @@
+From 1d4354d553be3fc86438d397274ddf854202ea95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 23 Jul 2022 01:30:29 +0000
+Subject: KVM: x86/mmu: Treat NX as a valid SPTE bit for NPT
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 6c6ab524cfae0799e55c82b2c1d61f1af0156f8d ]
+
+Treat the NX bit as valid when using NPT, as KVM will set the NX bit when
+the NX huge page mitigation is enabled (mindblowing) and trigger the WARN
+that fires on reserved SPTE bits being set.
+
+KVM has required NX support for SVM since commit b26a71a1a5b9 ("KVM: SVM:
+Refuse to load kvm_amd if NX support is not available") for exactly this
+reason, but apparently it never occurred to anyone to actually test NPT
+with the mitigation enabled.
+
+  ------------[ cut here ]------------
+  spte = 0x800000018a600ee7, level = 2, rsvd bits = 0x800f0000001fe000
+  WARNING: CPU: 152 PID: 15966 at arch/x86/kvm/mmu/spte.c:215 make_spte+0x327/0x340 [kvm]
+  Hardware name: Google, Inc. Arcadia_IT_80/Arcadia_IT_80, BIOS 10.48.0 01/27/2022
+  RIP: 0010:make_spte+0x327/0x340 [kvm]
+  Call Trace:
+   <TASK>
+   tdp_mmu_map_handle_target_level+0xc3/0x230 [kvm]
+   kvm_tdp_mmu_map+0x343/0x3b0 [kvm]
+   direct_page_fault+0x1ae/0x2a0 [kvm]
+   kvm_tdp_page_fault+0x7d/0x90 [kvm]
+   kvm_mmu_page_fault+0xfb/0x2e0 [kvm]
+   npf_interception+0x55/0x90 [kvm_amd]
+   svm_invoke_exit_handler+0x31/0xf0 [kvm_amd]
+   svm_handle_exit+0xf6/0x1d0 [kvm_amd]
+   vcpu_enter_guest+0xb6d/0xee0 [kvm]
+   ? kvm_pmu_trigger_event+0x6d/0x230 [kvm]
+   vcpu_run+0x65/0x2c0 [kvm]
+   kvm_arch_vcpu_ioctl_run+0x355/0x610 [kvm]
+   kvm_vcpu_ioctl+0x551/0x610 [kvm]
+   __se_sys_ioctl+0x77/0xc0
+   __x64_sys_ioctl+0x1d/0x20
+   do_syscall_64+0x44/0xa0
+   entry_SYSCALL_64_after_hwframe+0x46/0xb0
+   </TASK>
+  ---[ end trace 0000000000000000 ]---
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220723013029.1753623-1-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/mmu/mmu.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
+index 17252f39bd7c..a1d17a826807 100644
+--- a/arch/x86/kvm/mmu/mmu.c
++++ b/arch/x86/kvm/mmu/mmu.c
+@@ -4567,7 +4567,7 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_mmu *context)
+ 
+       if (boot_cpu_is_amd())
+               __reset_rsvds_bits_mask(shadow_zero_check, reserved_hpa_bits(),
+-                                      context->root_role.level, false,
++                                      context->root_role.level, true,
+                                       boot_cpu_has(X86_FEATURE_GBPAGES),
+                                       false, true);
+       else
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when-host-i.patch b/queue-5.19/kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when-host-i.patch

new file mode 100644 (file)

index 0000000..854f7e8
--- /dev/null
+++ b/queue-5.19/kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when-host-i.patch
@@ -0,0 +1,74 @@
+From a056ce6a166845c496ed36a12d7d5933069164a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 1 Jun 2022 11:19:23 +0800
+Subject: KVM: x86/pmu: Accept 0 for absent PMU MSRs when host-initiated if
+ !enable_pmu
+
+From: Like Xu <likexu@tencent.com>
+
+[ Upstream commit 8e6a58e28b34e8d247e772159b8fa8f6bae39192 ]
+
+Whenever an MSR is part of KVM_GET_MSR_INDEX_LIST, as is the case for
+MSR_K7_EVNTSEL0 or MSR_F15H_PERF_CTL0, it has to be always retrievable
+and settable with KVM_GET_MSR and KVM_SET_MSR.
+
+Accept a zero value for these MSRs to obey the contract.
+
+Signed-off-by: Like Xu <likexu@tencent.com>
+Message-Id: <20220601031925.59693-1-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/pmu.c     |  8 ++++++++
+ arch/x86/kvm/svm/pmu.c | 11 ++++++++++-
+ 2 files changed, 18 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index 3f868fed9114..2334ddfbbab2 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -433,11 +433,19 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+ 
+ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ {
++      if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu) {
++              msr_info->data = 0;
++              return 0;
++      }
++
+       return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
+ }
+ 
+ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ {
++      if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu)
++              return !!msr_info->data;
++
+       kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
+       return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
+ }
+diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
+index 136039fc6d01..d93ecb25fe17 100644
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -232,7 +232,16 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+ {
+       /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough.  */
+-      return false;
++      if (!host_initiated)
++              return false;
++
++      switch (msr) {
++      case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
++      case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
++              return true;
++      default:
++              return false;
++      }
+ }
+ 
+ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch b/queue-5.19/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch

new file mode 100644 (file)

index 0000000..9af4d17
--- /dev/null
+++ b/queue-5.19/kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch
@@ -0,0 +1,40 @@
+From 3ef552d63cfb13094e4272e9105ff444b5077f6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 May 2022 18:22:02 +0800
+Subject: KVM: x86/pmu: Ignore pmu->global_ctrl check if vPMU doesn't support
+ global_ctrl
+
+From: Like Xu <likexu@tencent.com>
+
+[ Upstream commit 98defd2e17803263f49548fea930cfc974d505aa ]
+
+MSR_CORE_PERF_GLOBAL_CTRL is introduced as part of Architecture PMU V2,
+as indicated by Intel SDM 19.2.2 and the intel_is_valid_msr() function.
+
+So in the absence of global_ctrl support, all PMCs are enabled as AMD does.
+
+Signed-off-by: Like Xu <likexu@tencent.com>
+Message-Id: <20220509102204.62389-1-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/vmx/pmu_intel.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 2696b16f9283..8faac421171f 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -98,6 +98,9 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
+ {
+       struct kvm_pmu *pmu = pmc_to_pmu(pmc);
+ 
++      if (pmu->version < 2)
++              return true;
++
+       return test_bit(pmc->idx, (unsigned long *)&pmu->global_ctrl);
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch b/queue-5.19/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch

new file mode 100644 (file)

index 0000000..0b25790
--- /dev/null
+++ b/queue-5.19/kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch
@@ -0,0 +1,79 @@
+From 5cc1c845cb6e026219f9677543c3f41c704ab187 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Apr 2022 18:19:34 +0800
+Subject: KVM: x86/pmu: Introduce the ctrl_mask value for fixed counter
+
+From: Like Xu <like.xu@linux.intel.com>
+
+[ Upstream commit 2c985527dd8d283e786ad7a67e532ef7f6f00fac ]
+
+The mask value of fixed counter control register should be dynamic
+adjusted with the number of fixed counters. This patch introduces a
+variable that includes the reserved bits of fixed counter control
+registers. This is a generic code refactoring.
+
+Co-developed-by: Luwei Kang <luwei.kang@intel.com>
+Signed-off-by: Luwei Kang <luwei.kang@intel.com>
+Signed-off-by: Like Xu <like.xu@linux.intel.com>
+Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Message-Id: <20220411101946.20262-6-likexu@tencent.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/include/asm/kvm_host.h | 1 +
+ arch/x86/kvm/vmx/pmu_intel.c    | 6 +++++-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
+index 9217bd6cf0d1..4b41ab003a1c 100644
+--- a/arch/x86/include/asm/kvm_host.h
++++ b/arch/x86/include/asm/kvm_host.h
+@@ -505,6 +505,7 @@ struct kvm_pmu {
+       unsigned nr_arch_fixed_counters;
+       unsigned available_event_types;
+       u64 fixed_ctr_ctrl;
++      u64 fixed_ctr_ctrl_mask;
+       u64 global_ctrl;
+       u64 global_status;
+       u64 counter_bitmask[2];
+diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c
+index 37e9eb32e3d9..7e72022a00aa 100644
+--- a/arch/x86/kvm/vmx/pmu_intel.c
++++ b/arch/x86/kvm/vmx/pmu_intel.c
+@@ -395,7 +395,7 @@ static int intel_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+       case MSR_CORE_PERF_FIXED_CTR_CTRL:
+               if (pmu->fixed_ctr_ctrl == data)
+                       return 0;
+-              if (!(data & 0xfffffffffffff444ull)) {
++              if (!(data & pmu->fixed_ctr_ctrl_mask)) {
+                       reprogram_fixed_counters(pmu, data);
+                       return 0;
+               }
+@@ -479,6 +479,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       struct kvm_cpuid_entry2 *entry;
+       union cpuid10_eax eax;
+       union cpuid10_edx edx;
++      int i;
+ 
+       pmu->nr_arch_gp_counters = 0;
+       pmu->nr_arch_fixed_counters = 0;
+@@ -487,6 +488,7 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+       pmu->version = 0;
+       pmu->reserved_bits = 0xffffffff00200000ull;
+       pmu->raw_event_mask = X86_RAW_EVENT_MASK;
++      pmu->fixed_ctr_ctrl_mask = ~0ull;
+ 
+       entry = kvm_find_cpuid_entry(vcpu, 0xa, 0);
+       if (!entry || !vcpu->kvm->arch.enable_pmu)
+@@ -522,6 +524,8 @@ static void intel_pmu_refresh(struct kvm_vcpu *vcpu)
+               setup_fixed_pmc_eventsel(pmu);
+       }
+ 
++      for (i = 0; i < pmu->nr_arch_fixed_counters; i++)
++              pmu->fixed_ctr_ctrl_mask &= ~(0xbull << (i * 4));
+       pmu->global_ctrl = ((1ull << pmu->nr_arch_gp_counters) - 1) |
+               (((1ull << pmu->nr_arch_fixed_counters) - 1) << INTEL_PMC_IDX_FIXED);
+       pmu->global_ctrl_mask = ~pmu->global_ctrl;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-set-error-code-to-segment-selector-on-lldt-l.patch b/queue-5.19/kvm-x86-set-error-code-to-segment-selector-on-lldt-l.patch

new file mode 100644 (file)

index 0000000..93fb9e9
--- /dev/null
+++ b/queue-5.19/kvm-x86-set-error-code-to-segment-selector-on-lldt-l.patch
@@ -0,0 +1,47 @@
+From a1cc01410606cb8a22afc4b77d94cb1df15fb3ca Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 23:27:49 +0000
+Subject: KVM: x86: Set error code to segment selector on LLDT/LTR
+ non-canonical #GP
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 2626206963ace9e8bf92b6eea5ff78dd674c555c ]
+
+When injecting a #GP on LLDT/LTR due to a non-canonical LDT/TSS base, set
+the error code to the selector.  Intel SDM's says nothing about the #GP,
+but AMD's APM explicitly states that both LLDT and LTR set the error code
+to the selector, not zero.
+
+Note, a non-canonical memory operand on LLDT/LTR does generate a #GP(0),
+but the KVM code in question is specific to the base from the descriptor.
+
+Fixes: e37a75a13cda ("KVM: x86: Emulator ignores LDTR/TR extended base on LLDT/LTR")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Maxim Levitsky <mlevitsk@redhat.com>
+Link: https://lore.kernel.org/r/20220711232750.1092012-3-seanjc@google.com
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/emulate.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
+index 93a969066d5c..aa907cec0918 100644
+--- a/arch/x86/kvm/emulate.c
++++ b/arch/x86/kvm/emulate.c
+@@ -1724,8 +1724,8 @@ static int __load_segment_descriptor(struct x86_emulate_ctxt *ctxt,
+               if (ret != X86EMUL_CONTINUE)
+                       return ret;
+               if (emul_is_noncanonical_address(get_desc_base(&seg_desc) |
+-                              ((u64)base3 << 32), ctxt))
+-                      return emulate_gp(ctxt, 0);
++                                               ((u64)base3 << 32), ctxt))
++                      return emulate_gp(ctxt, err_code);
+       }
+ 
+       if (seg == VCPU_SREG_TR) {
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch b/queue-5.19/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch

new file mode 100644 (file)

index 0000000..7e85a48
--- /dev/null
+++ b/queue-5.19/kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch
@@ -0,0 +1,50 @@
+From 6ad9a4e3b739c76f6f8fff24f0644087aeefea77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 May 2022 22:27:14 +0000
+Subject: KVM: x86: Signal #GP, not -EPERM, on bad WRMSR(MCi_CTL/STATUS)
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 2368048bf5c2ec4b604ac3431564071e89a0bc71 ]
+
+Return '1', not '-1', when handling an illegal WRMSR to a MCi_CTL or
+MCi_STATUS MSR.  The behavior of "all zeros' or "all ones" for CTL MSRs
+is architectural, as is the "only zeros" behavior for STATUS MSRs.  I.e.
+the intent is to inject a #GP, not exit to userspace due to an unhandled
+emulation case.  Returning '-1' gets interpreted as -EPERM up the stack
+and effecitvely kills the guest.
+
+Fixes: 890ca9aefa78 ("KVM: Add MCE support")
+Fixes: 9ffd986c6e4e ("KVM: X86: #GP when guest attempts to write MCi_STATUS register w/o 0")
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Reviewed-by: Jim Mattson <jmattson@google.com>
+Link: https://lore.kernel.org/r/20220512222716.4112548-2-seanjc@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/x86.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index b2949f653564..68d40cb5709d 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -3246,13 +3246,13 @@ static int set_msr_mce(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+                        */
+                       if ((offset & 0x3) == 0 &&
+                           data != 0 && (data | (1 << 10) | 1) != ~(u64)0)
+-                              return -1;
++                              return 1;
+ 
+                       /* MCi_STATUS */
+                       if (!msr_info->host_initiated &&
+                           (offset & 0x3) == 1 && data != 0) {
+                               if (!can_set_mci_status(vcpu))
+-                                      return -1;
++                                      return 1;
+                       }
+ 
+                       vcpu->arch.mce_banks[offset] = data;
+-- 
+2.35.1
+
diff --git a/queue-5.19/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-n.patch b/queue-5.19/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-n.patch

new file mode 100644 (file)

index 0000000..e9e0dd6
--- /dev/null
+++ b/queue-5.19/kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-n.patch
@@ -0,0 +1,116 @@
+From 38fd254b03ece4442185f4ea788de4316699491a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jun 2022 21:35:50 +0000
+Subject: KVM: x86: Split kvm_is_valid_cr4() and export only the non-vendor
+ bits
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit c33f6f2228fe8517e38941a508e9f905f99ecba9 ]
+
+Split the common x86 parts of kvm_is_valid_cr4(), i.e. the reserved bits
+checks, into a separate helper, __kvm_is_valid_cr4(), and export only the
+inner helper to vendor code in order to prevent nested VMX from calling
+back into vmx_is_valid_cr4() via kvm_is_valid_cr4().
+
+On SVM, this is a nop as SVM doesn't place any additional restrictions on
+CR4.
+
+On VMX, this is also currently a nop, but only because nested VMX is
+missing checks on reserved CR4 bits for nested VM-Enter.  That bug will
+be fixed in a future patch, and could simply use kvm_is_valid_cr4() as-is,
+but nVMX has _another_ bug where VMXON emulation doesn't enforce VMX's
+restrictions on CR0/CR4.  The cleanest and most intuitive way to fix the
+VMXON bug is to use nested_host_cr{0,4}_valid().  If the CR4 variant
+routes through kvm_is_valid_cr4(), using nested_host_cr4_valid() won't do
+the right thing for the VMXON case as vmx_is_valid_cr4() enforces VMX's
+restrictions if and only if the vCPU is post-VMXON.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220607213604.3346000-2-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/svm/nested.c |  3 ++-
+ arch/x86/kvm/vmx/vmx.c    |  4 ++--
+ arch/x86/kvm/x86.c        | 12 +++++++++---
+ arch/x86/kvm/x86.h        |  2 +-
+ 4 files changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
+index ba7cd26f438f..1773080976ca 100644
+--- a/arch/x86/kvm/svm/nested.c
++++ b/arch/x86/kvm/svm/nested.c
+@@ -320,7 +320,8 @@ static bool __nested_vmcb_check_save(struct kvm_vcpu *vcpu,
+                       return false;
+       }
+ 
+-      if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
++      /* Note, SVM doesn't have any additional restrictions on CR4. */
++      if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
+               return false;
+ 
+       if (CC(!kvm_valid_efer(vcpu, save->efer)))
+diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c
+index be7c19374fdd..0aaea87a1459 100644
+--- a/arch/x86/kvm/vmx/vmx.c
++++ b/arch/x86/kvm/vmx/vmx.c
+@@ -3230,8 +3230,8 @@ static bool vmx_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+       /*
+        * We operate under the default treatment of SMM, so VMX cannot be
+-       * enabled under SMM.  Note, whether or not VMXE is allowed at all is
+-       * handled by kvm_is_valid_cr4().
++       * enabled under SMM.  Note, whether or not VMXE is allowed at all,
++       * i.e. is a reserved bit, is handled by common x86 code.
+        */
+       if ((cr4 & X86_CR4_VMXE) && is_smm(vcpu))
+               return false;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 68d40cb5709d..9eac0528d584 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -1094,7 +1094,7 @@ int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
+ }
+ EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
+ 
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+       if (cr4 & cr4_reserved_bits)
+               return false;
+@@ -1102,9 +1102,15 @@ bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+       if (cr4 & vcpu->arch.cr4_guest_rsvd_bits)
+               return false;
+ 
+-      return static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
++      return true;
++}
++EXPORT_SYMBOL_GPL(__kvm_is_valid_cr4);
++
++static bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
++{
++      return __kvm_is_valid_cr4(vcpu, cr4) &&
++             static_call(kvm_x86_is_valid_cr4)(vcpu, cr4);
+ }
+-EXPORT_SYMBOL_GPL(kvm_is_valid_cr4);
+ 
+ void kvm_post_set_cr4(struct kvm_vcpu *vcpu, unsigned long old_cr4, unsigned long cr4)
+ {
+diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h
+index 588792f00334..80417761fe4a 100644
+--- a/arch/x86/kvm/x86.h
++++ b/arch/x86/kvm/x86.h
+@@ -407,7 +407,7 @@ static inline void kvm_machine_check(void)
+ void kvm_load_guest_xsave_state(struct kvm_vcpu *vcpu);
+ void kvm_load_host_xsave_state(struct kvm_vcpu *vcpu);
+ int kvm_spec_ctrl_test_value(u64 value);
+-bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
+ int kvm_handle_memory_failure(struct kvm_vcpu *vcpu, int r,
+                             struct x86_exception *e);
+ int kvm_handle_invpcid(struct kvm_vcpu *vcpu, unsigned long type, gva_t gva);
+-- 
+2.35.1
+
diff --git a/queue-5.19/locking-csd_lock-change-csdlock_debug-from-early_par.patch b/queue-5.19/locking-csd_lock-change-csdlock_debug-from-early_par.patch

new file mode 100644 (file)

index 0000000..2072862
--- /dev/null
+++ b/queue-5.19/locking-csd_lock-change-csdlock_debug-from-early_par.patch
@@ -0,0 +1,56 @@
+From bc7f1823d1f898178cd40490a1dd9aa1a4ddce82 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 10 May 2022 17:46:39 +0800
+Subject: locking/csd_lock: Change csdlock_debug from early_param to __setup
+
+From: Chen Zhongjin <chenzhongjin@huawei.com>
+
+[ Upstream commit 9c9b26b0df270d4f9246e483a44686fca951a29c ]
+
+The csdlock_debug kernel-boot parameter is parsed by the
+early_param() function csdlock_debug().  If set, csdlock_debug()
+invokes static_branch_enable() to enable csd_lock_wait feature, which
+triggers a panic on arm64 for kernels built with CONFIG_SPARSEMEM=y and
+CONFIG_SPARSEMEM_VMEMMAP=n.
+
+With CONFIG_SPARSEMEM_VMEMMAP=n, __nr_to_section is called in
+static_key_enable() and returns NULL, resulting in a NULL dereference
+because mem_section is initialized only later in sparse_init().
+
+This is also a problem for powerpc because early_param() functions
+are invoked earlier than jump_label_init(), also resulting in
+static_key_enable() failures.  These failures cause the warning "static
+key 'xxx' used before call to jump_label_init()".
+
+Thus, early_param is too early for csd_lock_wait to run
+static_branch_enable(), so changes it to __setup to fix these.
+
+Fixes: 8d0968cc6b8f ("locking/csd_lock: Add boot parameter for controlling CSD lock debugging")
+Cc: stable@vger.kernel.org
+Reported-by: Chen jingwen <chenjingwen6@huawei.com>
+Signed-off-by: Chen Zhongjin <chenzhongjin@huawei.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/smp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/smp.c b/kernel/smp.c
+index dd215f439426..650810a6f29b 100644
+--- a/kernel/smp.c
++++ b/kernel/smp.c
+@@ -174,9 +174,9 @@ static int __init csdlock_debug(char *str)
+       if (val)
+               static_branch_enable(&csdlock_debug_enabled);
+ 
+-      return 0;
++      return 1;
+ }
+-early_param("csdlock_debug", csdlock_debug);
++__setup("csdlock_debug=", csdlock_debug);
+ 
+ static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+ static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+-- 
+2.35.1
+
diff --git a/queue-5.19/mbcache-add-functions-to-delete-entry-if-unused.patch-21045 b/queue-5.19/mbcache-add-functions-to-delete-entry-if-unused.patch-21045

new file mode 100644 (file)

index 0000000..9205bdc
--- /dev/null
+++ b/queue-5.19/mbcache-add-functions-to-delete-entry-if-unused.patch-21045
@@ -0,0 +1,155 @@
+From 5e473ef1c39d9c7a4982900926db3650f9bb71cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:21 +0200
+Subject: mbcache: add functions to delete entry if unused
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 3dc96bba65f53daa217f0a8f43edad145286a8f5 ]
+
+Add function mb_cache_entry_delete_or_get() to delete mbcache entry if
+it is unused and also add a function to wait for entry to become unused
+- mb_cache_entry_wait_unused(). We do not share code between the two
+deleting function as one of them will go away soon.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-2-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c            | 66 +++++++++++++++++++++++++++++++++++++++--
+ include/linux/mbcache.h | 10 ++++++-
+ 2 files changed, 73 insertions(+), 3 deletions(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index cfc28129fb6f..2010bc80a3f2 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -11,7 +11,7 @@
+ /*
+  * Mbcache is a simple key-value store. Keys need not be unique, however
+  * key-value pairs are expected to be unique (we use this fact in
+- * mb_cache_entry_delete()).
++ * mb_cache_entry_delete_or_get()).
+  *
+  * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+  * Ext4 also uses it for deduplication of xattr values stored in inodes.
+@@ -125,6 +125,19 @@ void __mb_cache_entry_free(struct mb_cache_entry *entry)
+ }
+ EXPORT_SYMBOL(__mb_cache_entry_free);
+ 
++/*
++ * mb_cache_entry_wait_unused - wait to be the last user of the entry
++ *
++ * @entry - entry to work on
++ *
++ * Wait to be the last user of the entry.
++ */
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry)
++{
++      wait_var_event(&entry->e_refcnt, atomic_read(&entry->e_refcnt) <= 3);
++}
++EXPORT_SYMBOL(mb_cache_entry_wait_unused);
++
+ static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+                                          struct mb_cache_entry *entry,
+                                          u32 key)
+@@ -217,7 +230,7 @@ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ }
+ EXPORT_SYMBOL(mb_cache_entry_get);
+ 
+-/* mb_cache_entry_delete - remove a cache entry
++/* mb_cache_entry_delete - try to remove a cache entry
+  * @cache - cache we work with
+  * @key - key
+  * @value - value
+@@ -254,6 +267,55 @@ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value)
+ }
+ EXPORT_SYMBOL(mb_cache_entry_delete);
+ 
++/* mb_cache_entry_delete_or_get - remove a cache entry if it has no users
++ * @cache - cache we work with
++ * @key - key
++ * @value - value
++ *
++ * Remove entry from cache @cache with key @key and value @value. The removal
++ * happens only if the entry is unused. The function returns NULL in case the
++ * entry was successfully removed or there's no entry in cache. Otherwise the
++ * function grabs reference of the entry that we failed to delete because it
++ * still has users and return it.
++ */
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++                                                  u32 key, u64 value)
++{
++      struct hlist_bl_node *node;
++      struct hlist_bl_head *head;
++      struct mb_cache_entry *entry;
++
++      head = mb_cache_entry_head(cache, key);
++      hlist_bl_lock(head);
++      hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
++              if (entry->e_key == key && entry->e_value == value) {
++                      if (atomic_read(&entry->e_refcnt) > 2) {
++                              atomic_inc(&entry->e_refcnt);
++                              hlist_bl_unlock(head);
++                              return entry;
++                      }
++                      /* We keep hash list reference to keep entry alive */
++                      hlist_bl_del_init(&entry->e_hash_list);
++                      hlist_bl_unlock(head);
++                      spin_lock(&cache->c_list_lock);
++                      if (!list_empty(&entry->e_list)) {
++                              list_del_init(&entry->e_list);
++                              if (!WARN_ONCE(cache->c_entry_count == 0,
++              "mbcache: attempt to decrement c_entry_count past zero"))
++                                      cache->c_entry_count--;
++                              atomic_dec(&entry->e_refcnt);
++                      }
++                      spin_unlock(&cache->c_list_lock);
++                      mb_cache_entry_put(cache, entry);
++                      return NULL;
++              }
++      }
++      hlist_bl_unlock(head);
++
++      return NULL;
++}
++EXPORT_SYMBOL(mb_cache_entry_delete_or_get);
++
+ /* mb_cache_entry_touch - cache entry got used
+  * @cache - cache the entry belongs to
+  * @entry - entry that got used
+diff --git a/include/linux/mbcache.h b/include/linux/mbcache.h
+index 20f1e3ff6013..8eca7f25c432 100644
+--- a/include/linux/mbcache.h
++++ b/include/linux/mbcache.h
+@@ -30,15 +30,23 @@ void mb_cache_destroy(struct mb_cache *cache);
+ int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+                         u64 value, bool reusable);
+ void __mb_cache_entry_free(struct mb_cache_entry *entry);
++void mb_cache_entry_wait_unused(struct mb_cache_entry *entry);
+ static inline int mb_cache_entry_put(struct mb_cache *cache,
+                                    struct mb_cache_entry *entry)
+ {
+-      if (!atomic_dec_and_test(&entry->e_refcnt))
++      unsigned int cnt = atomic_dec_return(&entry->e_refcnt);
++
++      if (cnt > 0) {
++              if (cnt <= 3)
++                      wake_up_var(&entry->e_refcnt);
+               return 0;
++      }
+       __mb_cache_entry_free(entry);
+       return 1;
+ }
+ 
++struct mb_cache_entry *mb_cache_entry_delete_or_get(struct mb_cache *cache,
++                                                  u32 key, u64 value);
+ void mb_cache_entry_delete(struct mb_cache *cache, u32 key, u64 value);
+ struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+                                         u64 value);
+-- 
+2.35.1
+
diff --git a/queue-5.19/mbcache-don-t-reclaim-used-entries.patch-21676 b/queue-5.19/mbcache-don-t-reclaim-used-entries.patch-21676

new file mode 100644 (file)

index 0000000..55cb781
--- /dev/null
+++ b/queue-5.19/mbcache-don-t-reclaim-used-entries.patch-21676
@@ -0,0 +1,55 @@
+From 7bd38da9abe3f76a78a3a3a79043c0c887d99af3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 12:54:20 +0200
+Subject: mbcache: don't reclaim used entries
+
+From: Jan Kara <jack@suse.cz>
+
+[ Upstream commit 58318914186c157477b978b1739dfe2f1b9dc0fe ]
+
+Do not reclaim entries that are currently used by somebody from a
+shrinker. Firstly, these entries are likely useful. Secondly, we will
+need to keep such entries to protect pending increment of xattr block
+refcount.
+
+CC: stable@vger.kernel.org
+Fixes: 82939d7999df ("ext4: convert to mbcache2")
+Signed-off-by: Jan Kara <jack@suse.cz>
+Link: https://lore.kernel.org/r/20220712105436.32204-1-jack@suse.cz
+Signed-off-by: Theodore Ts'o <tytso@mit.edu>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/mbcache.c | 10 +++++++++-
+ 1 file changed, 9 insertions(+), 1 deletion(-)
+
+diff --git a/fs/mbcache.c b/fs/mbcache.c
+index 97c54d3a2227..cfc28129fb6f 100644
+--- a/fs/mbcache.c
++++ b/fs/mbcache.c
+@@ -288,7 +288,7 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+       while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+               entry = list_first_entry(&cache->c_list,
+                                        struct mb_cache_entry, e_list);
+-              if (entry->e_referenced) {
++              if (entry->e_referenced || atomic_read(&entry->e_refcnt) > 2) {
+                       entry->e_referenced = 0;
+                       list_move_tail(&entry->e_list, &cache->c_list);
+                       continue;
+@@ -302,6 +302,14 @@ static unsigned long mb_cache_shrink(struct mb_cache *cache,
+               spin_unlock(&cache->c_list_lock);
+               head = mb_cache_entry_head(cache, entry->e_key);
+               hlist_bl_lock(head);
++              /* Now a reliable check if the entry didn't get used... */
++              if (atomic_read(&entry->e_refcnt) > 2) {
++                      hlist_bl_unlock(head);
++                      spin_lock(&cache->c_list_lock);
++                      list_add_tail(&entry->e_list, &cache->c_list);
++                      cache->c_entry_count++;
++                      continue;
++              }
+               if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+                       hlist_bl_del_init(&entry->e_hash_list);
+                       atomic_dec(&entry->e_refcnt);
+-- 
+2.35.1
+
diff --git a/queue-5.19/md-raid-destroy-the-bitmap-after-destroying-the-thre.patch b/queue-5.19/md-raid-destroy-the-bitmap-after-destroying-the-thre.patch

new file mode 100644 (file)

index 0000000..928143a
--- /dev/null
+++ b/queue-5.19/md-raid-destroy-the-bitmap-after-destroying-the-thre.patch
@@ -0,0 +1,134 @@
+From bbbaa00ff25f7981f951b2a3b98b902494102db6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 24 Jul 2022 14:26:12 -0400
+Subject: md-raid: destroy the bitmap after destroying the thread
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit e151db8ecfb019b7da31d076130a794574c89f6f ]
+
+When we ran the lvm test "shell/integrity-blocksize-3.sh" on a kernel with
+kasan, we got failure in write_page.
+
+The reason for the failure is that md_bitmap_destroy is called before
+destroying the thread and the thread may be waiting in the function
+write_page for the bio to complete. When the thread finishes waiting, it
+executes "if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags))", which
+triggers the kasan warning.
+
+Note that the commit 48df498daf62 that caused this bug claims that it is
+neede for md-cluster, you should check md-cluster and possibly find
+another bugfix for it.
+
+BUG: KASAN: use-after-free in write_page+0x18d/0x680 [md_mod]
+Read of size 8 at addr ffff889162030c78 by task mdX_raid1/5539
+
+CPU: 10 PID: 5539 Comm: mdX_raid1 Not tainted 5.19.0-rc2 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0x45/0x57a
+ ? __lock_text_start+0x18/0x18
+ ? write_page+0x18d/0x680 [md_mod]
+ kasan_report+0xa8/0xe0
+ ? write_page+0x18d/0x680 [md_mod]
+ kasan_check_range+0x13f/0x180
+ write_page+0x18d/0x680 [md_mod]
+ ? super_sync+0x4d5/0x560 [dm_raid]
+ ? md_bitmap_file_kick+0xa0/0xa0 [md_mod]
+ ? rs_set_dev_and_array_sectors+0x2e0/0x2e0 [dm_raid]
+ ? mutex_trylock+0x120/0x120
+ ? preempt_count_add+0x6b/0xc0
+ ? preempt_count_sub+0xf/0xc0
+ md_update_sb+0x707/0xe40 [md_mod]
+ md_reap_sync_thread+0x1b2/0x4a0 [md_mod]
+ md_check_recovery+0x533/0x960 [md_mod]
+ raid1d+0xc8/0x2a20 [raid1]
+ ? var_wake_function+0xe0/0xe0
+ ? psi_group_change+0x411/0x500
+ ? preempt_count_sub+0xf/0xc0
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? __lock_text_start+0x18/0x18
+ ? raid1_end_read_request+0x2a0/0x2a0 [raid1]
+ ? preempt_count_sub+0xf/0xc0
+ ? _raw_spin_unlock_irqrestore+0x19/0x40
+ ? del_timer_sync+0xa9/0x100
+ ? try_to_del_timer_sync+0xc0/0xc0
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? __lock_text_start+0x18/0x18
+ ? __list_del_entry_valid+0x68/0xa0
+ ? finish_wait+0xa3/0x100
+ md_thread+0x161/0x260 [md_mod]
+ ? unregister_md_personality+0xa0/0xa0 [md_mod]
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? prepare_to_wait_event+0x2c0/0x2c0
+ ? unregister_md_personality+0xa0/0xa0 [md_mod]
+ kthread+0x148/0x180
+ ? kthread_complete_and_exit+0x20/0x20
+ ret_from_fork+0x1f/0x30
+ </TASK>
+
+Allocated by task 5522:
+ kasan_save_stack+0x1e/0x40
+ __kasan_kmalloc+0x80/0xa0
+ md_bitmap_create+0xa8/0xe80 [md_mod]
+ md_run+0x777/0x1300 [md_mod]
+ raid_ctr+0x249c/0x4a30 [dm_raid]
+ dm_table_add_target+0x2b0/0x620 [dm_mod]
+ table_load+0x1c8/0x400 [dm_mod]
+ ctl_ioctl+0x29e/0x560 [dm_mod]
+ dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
+ __do_compat_sys_ioctl+0xfa/0x160
+ do_syscall_64+0x90/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Freed by task 5680:
+ kasan_save_stack+0x1e/0x40
+ kasan_set_track+0x21/0x40
+ kasan_set_free_info+0x20/0x40
+ __kasan_slab_free+0xf7/0x140
+ kfree+0x80/0x240
+ md_bitmap_free+0x1c3/0x280 [md_mod]
+ __md_stop+0x21/0x120 [md_mod]
+ md_stop+0x9/0x40 [md_mod]
+ raid_dtr+0x1b/0x40 [dm_raid]
+ dm_table_destroy+0x98/0x1e0 [dm_mod]
+ __dm_destroy+0x199/0x360 [dm_mod]
+ dev_remove+0x10c/0x160 [dm_mod]
+ ctl_ioctl+0x29e/0x560 [dm_mod]
+ dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
+ __do_compat_sys_ioctl+0xfa/0x160
+ do_syscall_64+0x90/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Fixes: 48df498daf62 ("md: move bitmap_destroy to the beginning of __md_stop")
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/md.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/md/md.c b/drivers/md/md.c
+index c7ecb0bffda0..660c52d48256 100644
+--- a/drivers/md/md.c
++++ b/drivers/md/md.c
+@@ -6244,11 +6244,11 @@ static void mddev_detach(struct mddev *mddev)
+ static void __md_stop(struct mddev *mddev)
+ {
+       struct md_personality *pers = mddev->pers;
+-      md_bitmap_destroy(mddev);
+       mddev_detach(mddev);
+       /* Ensure ->event_work is done */
+       if (mddev->event_work.func)
+               flush_workqueue(md_misc_wq);
++      md_bitmap_destroy(mddev);
+       spin_lock(&mddev->lock);
+       mddev->pers = NULL;
+       spin_unlock(&mddev->lock);
+-- 
+2.35.1
+
diff --git a/queue-5.19/md-raid10-fix-kasan-warning.patch-1758 b/queue-5.19/md-raid10-fix-kasan-warning.patch-1758

new file mode 100644 (file)

index 0000000..0cd6af5
--- /dev/null
+++ b/queue-5.19/md-raid10-fix-kasan-warning.patch-1758
@@ -0,0 +1,153 @@
+From 24f0e8306bf150abac23c3c24598c1bedb55fe12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 04:33:12 -0400
+Subject: md-raid10: fix KASAN warning
+
+From: Mikulas Patocka <mpatocka@redhat.com>
+
+[ Upstream commit d17f744e883b2f8d13cca252d71cfe8ace346f7d ]
+
+There's a KASAN warning in raid10_remove_disk when running the lvm
+test lvconvert-raid-reshape.sh. We fix this warning by verifying that the
+value "number" is valid.
+
+BUG: KASAN: slab-out-of-bounds in raid10_remove_disk+0x61/0x2a0 [raid10]
+Read of size 8 at addr ffff889108f3d300 by task mdX_raid10/124682
+
+CPU: 3 PID: 124682 Comm: mdX_raid10 Not tainted 5.19.0-rc6 #1
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.14.0-2 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x34/0x44
+ print_report.cold+0x45/0x57a
+ ? __lock_text_start+0x18/0x18
+ ? raid10_remove_disk+0x61/0x2a0 [raid10]
+ kasan_report+0xa8/0xe0
+ ? raid10_remove_disk+0x61/0x2a0 [raid10]
+ raid10_remove_disk+0x61/0x2a0 [raid10]
+Buffer I/O error on dev dm-76, logical block 15344, async page read
+ ? __mutex_unlock_slowpath.constprop.0+0x1e0/0x1e0
+ remove_and_add_spares+0x367/0x8a0 [md_mod]
+ ? super_written+0x1c0/0x1c0 [md_mod]
+ ? mutex_trylock+0xac/0x120
+ ? _raw_spin_lock+0x72/0xc0
+ ? _raw_spin_lock_bh+0xc0/0xc0
+ md_check_recovery+0x848/0x960 [md_mod]
+ raid10d+0xcf/0x3360 [raid10]
+ ? sched_clock_cpu+0x185/0x1a0
+ ? rb_erase+0x4d4/0x620
+ ? var_wake_function+0xe0/0xe0
+ ? psi_group_change+0x411/0x500
+ ? preempt_count_sub+0xf/0xc0
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? __lock_text_start+0x18/0x18
+ ? raid10_sync_request+0x36c0/0x36c0 [raid10]
+ ? preempt_count_sub+0xf/0xc0
+ ? _raw_spin_unlock_irqrestore+0x19/0x40
+ ? del_timer_sync+0xa9/0x100
+ ? try_to_del_timer_sync+0xc0/0xc0
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? __lock_text_start+0x18/0x18
+ ? _raw_spin_unlock_irq+0x11/0x24
+ ? __list_del_entry_valid+0x68/0xa0
+ ? finish_wait+0xa3/0x100
+ md_thread+0x161/0x260 [md_mod]
+ ? unregister_md_personality+0xa0/0xa0 [md_mod]
+ ? _raw_spin_lock_irqsave+0x78/0xc0
+ ? prepare_to_wait_event+0x2c0/0x2c0
+ ? unregister_md_personality+0xa0/0xa0 [md_mod]
+ kthread+0x148/0x180
+ ? kthread_complete_and_exit+0x20/0x20
+ ret_from_fork+0x1f/0x30
+ </TASK>
+
+Allocated by task 124495:
+ kasan_save_stack+0x1e/0x40
+ __kasan_kmalloc+0x80/0xa0
+ setup_conf+0x140/0x5c0 [raid10]
+ raid10_run+0x4cd/0x740 [raid10]
+ md_run+0x6f9/0x1300 [md_mod]
+ raid_ctr+0x2531/0x4ac0 [dm_raid]
+ dm_table_add_target+0x2b0/0x620 [dm_mod]
+ table_load+0x1c8/0x400 [dm_mod]
+ ctl_ioctl+0x29e/0x560 [dm_mod]
+ dm_compat_ctl_ioctl+0x7/0x20 [dm_mod]
+ __do_compat_sys_ioctl+0xfa/0x160
+ do_syscall_64+0x90/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Last potentially related work creation:
+ kasan_save_stack+0x1e/0x40
+ __kasan_record_aux_stack+0x9e/0xc0
+ kvfree_call_rcu+0x84/0x480
+ timerfd_release+0x82/0x140
+L __fput+0xfa/0x400
+ task_work_run+0x80/0xc0
+ exit_to_user_mode_prepare+0x155/0x160
+ syscall_exit_to_user_mode+0x12/0x40
+ do_syscall_64+0x42/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+Second to last potentially related work creation:
+ kasan_save_stack+0x1e/0x40
+ __kasan_record_aux_stack+0x9e/0xc0
+ kvfree_call_rcu+0x84/0x480
+ timerfd_release+0x82/0x140
+ __fput+0xfa/0x400
+ task_work_run+0x80/0xc0
+ exit_to_user_mode_prepare+0x155/0x160
+ syscall_exit_to_user_mode+0x12/0x40
+ do_syscall_64+0x42/0xc0
+ entry_SYSCALL_64_after_hwframe+0x46/0xb0
+
+The buggy address belongs to the object at ffff889108f3d200
+ which belongs to the cache kmalloc-256 of size 256
+The buggy address is located 0 bytes to the right of
+ 256-byte region [ffff889108f3d200, ffff889108f3d300)
+
+The buggy address belongs to the physical page:
+page:000000007ef2a34c refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x1108f3c
+head:000000007ef2a34c order:2 compound_mapcount:0 compound_pincount:0
+flags: 0x4000000000010200(slab|head|zone=2)
+raw: 4000000000010200 0000000000000000 dead000000000001 ffff889100042b40
+raw: 0000000000000000 0000000080200020 00000001ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff889108f3d200: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffff889108f3d280: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+>ffff889108f3d300: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+                   ^
+ ffff889108f3d380: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
+ ffff889108f3d400: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+
+Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Song Liu <song@kernel.org>
+Signed-off-by: Jens Axboe <axboe@kernel.dk>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/md/raid10.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index d589f823feb1..f1908fe61677 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -2167,9 +2167,12 @@ static int raid10_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
+       int err = 0;
+       int number = rdev->raid_disk;
+       struct md_rdev **rdevp;
+-      struct raid10_info *p = conf->mirrors + number;
++      struct raid10_info *p;
+ 
+       print_conf(conf);
++      if (unlikely(number >= mddev->raid_disks))
++              return 0;
++      p = conf->mirrors + number;
+       if (rdev == p->rdev)
+               rdevp = &p->rdev;
+       else if (rdev == p->replacement)
+-- 
+2.35.1
+
diff --git a/queue-5.19/media-isl7998x-select-v4l2_fwnode-to-fix-build-error.patch-24025 b/queue-5.19/media-isl7998x-select-v4l2_fwnode-to-fix-build-error.patch-24025

new file mode 100644 (file)

index 0000000..cf4e5eb
--- /dev/null
+++ b/queue-5.19/media-isl7998x-select-v4l2_fwnode-to-fix-build-error.patch-24025
@@ -0,0 +1,44 @@
+From f2eeacd8cefb521e349a7d6c49d07c065dc84beb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 30 Mar 2022 02:56:52 +0100
+Subject: media: isl7998x: select V4L2_FWNODE to fix build error
+
+From: Randy Dunlap <rdunlap@infradead.org>
+
+[ Upstream commit 81e005842d0b8167c059553a1c29c36d8a7a9329 ]
+
+Fix build error when VIDEO_ISL7998X=y and V4L2_FWNODE=m
+by selecting V4L2_FWNODE.
+
+microblaze-linux-ld: drivers/media/i2c/isl7998x.o: in function `isl7998x_probe':
+(.text+0x8f4): undefined reference to `v4l2_fwnode_endpoint_parse'
+
+Cc: stable@vger.kernel.org # 5.18 and above
+Fixes: 51ef2be546e2 ("media: i2c: isl7998x: Add driver for Intersil ISL7998x")
+Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
+Reported-by: kernel test robot <lkp@intel.com>
+Cc: Marek Vasut <marex@denx.de>
+Cc: Pengutronix Kernel Team <kernel@pengutronix.de>
+Reviewed-by: Michael Tretter <m.tretter@pengutronix.de>
+Signed-off-by: Sakari Ailus <sakari.ailus@linux.intel.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/media/i2c/Kconfig | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/media/i2c/Kconfig b/drivers/media/i2c/Kconfig
+index 2b20aa6c37b1..c926e5d43820 100644
+--- a/drivers/media/i2c/Kconfig
++++ b/drivers/media/i2c/Kconfig
+@@ -1178,6 +1178,7 @@ config VIDEO_ISL7998X
+       depends on OF_GPIO
+       select MEDIA_CONTROLLER
+       select VIDEO_V4L2_SUBDEV_API
++      select V4L2_FWNODE
+       help
+         Support for Intersil ISL7998x analog to MIPI-CSI2 or
+         BT.656 decoder.
+-- 
+2.35.1
+
diff --git a/queue-5.19/media-patch-pci-atomisp_cmd-fix-three-missing-checks.patch b/queue-5.19/media-patch-pci-atomisp_cmd-fix-three-missing-checks.patch

new file mode 100644 (file)

index 0000000..f329fc3
--- /dev/null
+++ b/queue-5.19/media-patch-pci-atomisp_cmd-fix-three-missing-checks.patch
@@ -0,0 +1,148 @@
+From ad019b312a26390838e88bff7ec653e493796d75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Apr 2022 05:14:15 +0100
+Subject: media: [PATCH] pci: atomisp_cmd: fix three missing checks on list
+ iterator
+
+From: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+
+[ Upstream commit 09b204eb9de9fdf07d028c41c4331b5cfeb70dd7 ]
+
+The three bugs are here:
+       __func__, s3a_buf->s3a_data->exp_id);
+       __func__, md_buf->metadata->exp_id);
+       __func__, dis_buf->dis_data->exp_id);
+
+The list iterator 's3a_buf/md_buf/dis_buf' will point to a bogus
+position containing HEAD if the list is empty or no element is found.
+This case must be checked before any use of the iterator, otherwise
+it will lead to a invalid memory access.
+
+To fix this bug, add an check. Use a new variable '*_iter' as the
+list iterator, while use the old variable '*_buf' as a dedicated
+pointer to point to the found element.
+
+Link: https://lore.kernel.org/linux-media/20220414041415.3342-1-xiam0nd.tong@gmail.com
+Cc: stable@vger.kernel.org
+Fixes: ad85094b293e4 ("Revert "media: staging: atomisp: Remove driver"")
+Signed-off-by: Xiaomeng Tong <xiam0nd.tong@gmail.com>
+Signed-off-by: Mauro Carvalho Chehab <mchehab@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../staging/media/atomisp/pci/atomisp_cmd.c   | 57 ++++++++++++-------
+ 1 file changed, 36 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/staging/media/atomisp/pci/atomisp_cmd.c b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+index 97d5a528969b..0da0b69a4637 100644
+--- a/drivers/staging/media/atomisp/pci/atomisp_cmd.c
++++ b/drivers/staging/media/atomisp/pci/atomisp_cmd.c
+@@ -901,9 +901,9 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error,
+       int err;
+       unsigned long irqflags;
+       struct ia_css_frame *frame = NULL;
+-      struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp;
+-      struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp;
+-      struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp;
++      struct atomisp_s3a_buf *s3a_buf = NULL, *_s3a_buf_tmp, *s3a_iter;
++      struct atomisp_dis_buf *dis_buf = NULL, *_dis_buf_tmp, *dis_iter;
++      struct atomisp_metadata_buf *md_buf = NULL, *_md_buf_tmp, *md_iter;
+       enum atomisp_metadata_type md_type;
+       struct atomisp_device *isp = asd->isp;
+       struct v4l2_control ctrl;
+@@ -942,60 +942,75 @@ void atomisp_buf_done(struct atomisp_sub_device *asd, int error,
+ 
+       switch (buf_type) {
+       case IA_CSS_BUFFER_TYPE_3A_STATISTICS:
+-              list_for_each_entry_safe(s3a_buf, _s3a_buf_tmp,
++              list_for_each_entry_safe(s3a_iter, _s3a_buf_tmp,
+                                        &asd->s3a_stats_in_css, list) {
+-                      if (s3a_buf->s3a_data ==
++                      if (s3a_iter->s3a_data ==
+                           buffer.css_buffer.data.stats_3a) {
+-                              list_del_init(&s3a_buf->list);
+-                              list_add_tail(&s3a_buf->list,
++                              list_del_init(&s3a_iter->list);
++                              list_add_tail(&s3a_iter->list,
+                                             &asd->s3a_stats_ready);
++                              s3a_buf = s3a_iter;
+                               break;
+                       }
+               }
+ 
+               asd->s3a_bufs_in_css[css_pipe_id]--;
+               atomisp_3a_stats_ready_event(asd, buffer.css_buffer.exp_id);
+-              dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n",
+-                      __func__, s3a_buf->s3a_data->exp_id);
++              if (s3a_buf)
++                      dev_dbg(isp->dev, "%s: s3a stat with exp_id %d is ready\n",
++                              __func__, s3a_buf->s3a_data->exp_id);
++              else
++                      dev_dbg(isp->dev, "%s: s3a stat is ready with no exp_id found\n",
++                              __func__);
+               break;
+       case IA_CSS_BUFFER_TYPE_METADATA:
+               if (error)
+                       break;
+ 
+               md_type = atomisp_get_metadata_type(asd, css_pipe_id);
+-              list_for_each_entry_safe(md_buf, _md_buf_tmp,
++              list_for_each_entry_safe(md_iter, _md_buf_tmp,
+                                        &asd->metadata_in_css[md_type], list) {
+-                      if (md_buf->metadata ==
++                      if (md_iter->metadata ==
+                           buffer.css_buffer.data.metadata) {
+-                              list_del_init(&md_buf->list);
+-                              list_add_tail(&md_buf->list,
++                              list_del_init(&md_iter->list);
++                              list_add_tail(&md_iter->list,
+                                             &asd->metadata_ready[md_type]);
++                              md_buf = md_iter;
+                               break;
+                       }
+               }
+               asd->metadata_bufs_in_css[stream_id][css_pipe_id]--;
+               atomisp_metadata_ready_event(asd, md_type);
+-              dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n",
+-                      __func__, md_buf->metadata->exp_id);
++              if (md_buf)
++                      dev_dbg(isp->dev, "%s: metadata with exp_id %d is ready\n",
++                              __func__, md_buf->metadata->exp_id);
++              else
++                      dev_dbg(isp->dev, "%s: metadata is ready with no exp_id found\n",
++                              __func__);
+               break;
+       case IA_CSS_BUFFER_TYPE_DIS_STATISTICS:
+-              list_for_each_entry_safe(dis_buf, _dis_buf_tmp,
++              list_for_each_entry_safe(dis_iter, _dis_buf_tmp,
+                                        &asd->dis_stats_in_css, list) {
+-                      if (dis_buf->dis_data ==
++                      if (dis_iter->dis_data ==
+                           buffer.css_buffer.data.stats_dvs) {
+                               spin_lock_irqsave(&asd->dis_stats_lock,
+                                                 irqflags);
+-                              list_del_init(&dis_buf->list);
+-                              list_add(&dis_buf->list, &asd->dis_stats);
++                              list_del_init(&dis_iter->list);
++                              list_add(&dis_iter->list, &asd->dis_stats);
+                               asd->params.dis_proj_data_valid = true;
+                               spin_unlock_irqrestore(&asd->dis_stats_lock,
+                                                      irqflags);
++                              dis_buf = dis_iter;
+                               break;
+                       }
+               }
+               asd->dis_bufs_in_css--;
+-              dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n",
+-                      __func__, dis_buf->dis_data->exp_id);
++              if (dis_buf)
++                      dev_dbg(isp->dev, "%s: dis stat with exp_id %d is ready\n",
++                              __func__, dis_buf->dis_data->exp_id);
++              else
++                      dev_dbg(isp->dev, "%s: dis stat is ready with no exp_id found\n",
++                              __func__);
+               break;
+       case IA_CSS_BUFFER_TYPE_VF_OUTPUT_FRAME:
+       case IA_CSS_BUFFER_TYPE_SEC_VF_OUTPUT_FRAME:
+-- 
+2.35.1
+
diff --git a/queue-5.19/mips-cpuinfo-fix-a-warning-for-config_cpumask_offsta.patch b/queue-5.19/mips-cpuinfo-fix-a-warning-for-config_cpumask_offsta.patch

new file mode 100644 (file)

index 0000000..1da20ec
--- /dev/null
+++ b/queue-5.19/mips-cpuinfo-fix-a-warning-for-config_cpumask_offsta.patch
@@ -0,0 +1,68 @@
+From 4ec7e9e909329887c2c397938d5deccbe1f97c21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 16:41:34 +0800
+Subject: MIPS: cpuinfo: Fix a warning for CONFIG_CPUMASK_OFFSTACK
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+[ Upstream commit e1a534f5d074db45ae5cbac41d8912b98e96a006 ]
+
+When CONFIG_CPUMASK_OFFSTACK and CONFIG_DEBUG_PER_CPU_MAPS is selected,
+cpu_max_bits_warn() generates a runtime warning similar as below while
+we show /proc/cpuinfo. Fix this by using nr_cpu_ids (the runtime limit)
+instead of NR_CPUS to iterate CPUs.
+
+[    3.052463] ------------[ cut here ]------------
+[    3.059679] WARNING: CPU: 3 PID: 1 at include/linux/cpumask.h:108 show_cpuinfo+0x5e8/0x5f0
+[    3.070072] Modules linked in: efivarfs autofs4
+[    3.076257] CPU: 0 PID: 1 Comm: systemd Not tainted 5.19-rc5+ #1052
+[    3.084034] Hardware name: Loongson Loongson-3A4000-7A1000-1w-V0.1-CRB/Loongson-LS3A4000-7A1000-1w-EVB-V1.21, BIOS Loongson-UDK2018-V2.0.04082-beta7 04/27
+[    3.099465] Stack : 9000000100157b08 9000000000f18530 9000000000cf846c 9000000100154000
+[    3.109127]         9000000100157a50 0000000000000000 9000000100157a58 9000000000ef7430
+[    3.118774]         90000001001578e8 0000000000000040 0000000000000020 ffffffffffffffff
+[    3.128412]         0000000000aaaaaa 1ab25f00eec96a37 900000010021de80 900000000101c890
+[    3.138056]         0000000000000000 0000000000000000 0000000000000000 0000000000aaaaaa
+[    3.147711]         ffff8000339dc220 0000000000000001 0000000006ab4000 0000000000000000
+[    3.157364]         900000000101c998 0000000000000004 9000000000ef7430 0000000000000000
+[    3.167012]         0000000000000009 000000000000006c 0000000000000000 0000000000000000
+[    3.176641]         9000000000d3de08 9000000001639390 90000000002086d8 00007ffff0080286
+[    3.186260]         00000000000000b0 0000000000000004 0000000000000000 0000000000071c1c
+[    3.195868]         ...
+[    3.199917] Call Trace:
+[    3.203941] [<98000000002086d8>] show_stack+0x38/0x14c
+[    3.210666] [<9800000000cf846c>] dump_stack_lvl+0x60/0x88
+[    3.217625] [<980000000023d268>] __warn+0xd0/0x100
+[    3.223958] [<9800000000cf3c90>] warn_slowpath_fmt+0x7c/0xcc
+[    3.231150] [<9800000000210220>] show_cpuinfo+0x5e8/0x5f0
+[    3.238080] [<98000000004f578c>] seq_read_iter+0x354/0x4b4
+[    3.245098] [<98000000004c2e90>] new_sync_read+0x17c/0x1c4
+[    3.252114] [<98000000004c5174>] vfs_read+0x138/0x1d0
+[    3.258694] [<98000000004c55f8>] ksys_read+0x70/0x100
+[    3.265265] [<9800000000cfde9c>] do_syscall+0x7c/0x94
+[    3.271820] [<9800000000202fe4>] handle_syscall+0xc4/0x160
+[    3.281824] ---[ end trace 8b484262b4b8c24c ]---
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/mips/kernel/proc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/mips/kernel/proc.c b/arch/mips/kernel/proc.c
+index bb43bf850314..8eba5a1ed664 100644
+--- a/arch/mips/kernel/proc.c
++++ b/arch/mips/kernel/proc.c
+@@ -311,7 +311,7 @@ static void *c_start(struct seq_file *m, loff_t *pos)
+ {
+       unsigned long i = *pos;
+ 
+-      return i < NR_CPUS ? (void *) (i + 1) : NULL;
++      return i < nr_cpu_ids ? (void *) (i + 1) : NULL;
+ }
+ 
+ static void *c_next(struct seq_file *m, void *v, loff_t *pos)
+-- 
+2.35.1
+
diff --git a/queue-5.19/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch b/queue-5.19/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch

new file mode 100644 (file)

index 0000000..8fe0a04
--- /dev/null
+++ b/queue-5.19/mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch
@@ -0,0 +1,46 @@
+From 3177c1c1aa05462312591eb6c49e86a75a617887 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 14 Jul 2022 14:37:46 +0800
+Subject: mm/damon/reclaim: fix potential memory leak in damon_reclaim_init()
+
+From: Jianglei Nie <niejianglei2021@163.com>
+
+[ Upstream commit 188043c7f4f2bd662f2a55957d684fffa543e600 ]
+
+damon_reclaim_init() allocates a memory chunk for ctx with
+damon_new_ctx().  When damon_select_ops() fails, ctx is not released,
+which will lead to a memory leak.
+
+We should release the ctx with damon_destroy_ctx() when damon_select_ops()
+fails to fix the memory leak.
+
+Link: https://lkml.kernel.org/r/20220714063746.2343549-1-niejianglei2021@163.com
+Fixes: 4d69c3457821 ("mm/damon/reclaim: use damon_select_ops() instead of damon_{v,p}a_set_operations()")
+Signed-off-by: Jianglei Nie <niejianglei2021@163.com>
+Reviewed-by: SeongJae Park <sj@kernel.org>
+Cc: <stable@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ mm/damon/reclaim.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/mm/damon/reclaim.c b/mm/damon/reclaim.c
+index 4b07c29effe9..0b3c7396cb90 100644
+--- a/mm/damon/reclaim.c
++++ b/mm/damon/reclaim.c
+@@ -441,8 +441,10 @@ static int __init damon_reclaim_init(void)
+       if (!ctx)
+               return -ENOMEM;
+ 
+-      if (damon_select_ops(ctx, DAMON_OPS_PADDR))
++      if (damon_select_ops(ctx, DAMON_OPS_PADDR)) {
++              damon_destroy_ctx(ctx);
+               return -EINVAL;
++      }
+ 
+       ctx->callback.after_wmarks_check = damon_reclaim_after_wmarks_check;
+       ctx->callback.after_aggregation = damon_reclaim_after_aggregation;
+-- 
+2.35.1
+
diff --git a/queue-5.19/mtd-rawnand-arasan-fix-clock-rate-in-nv-ddr.patch-18581 b/queue-5.19/mtd-rawnand-arasan-fix-clock-rate-in-nv-ddr.patch-18581

new file mode 100644 (file)

index 0000000..cbe7548
--- /dev/null
+++ b/queue-5.19/mtd-rawnand-arasan-fix-clock-rate-in-nv-ddr.patch-18581
@@ -0,0 +1,51 @@
+From bbb048ec8a35951f2c60fed8519533208a158421 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 21:18:24 +0530
+Subject: mtd: rawnand: arasan: Fix clock rate in NV-DDR
+
+From: Olga Kitaina <okitain@gmail.com>
+
+[ Upstream commit e16eceea863b417fd328588b1be1a79de0bc937f ]
+
+According to the Arasan NAND controller spec, the flash clock rate for SDR
+must be <= 100 MHz, while for NV-DDR it must be the same as the rate of the
+CLK line for the mode. The driver previously always set 100 MHz for NV-DDR,
+which would result in incorrect behavior for NV-DDR modes 0-4.
+
+The appropriate clock rate can be calculated from the NV-DDR timing
+parameters as 1/tCK, or for rates measured in picoseconds,
+10^12 / nand_nvddr_timings->tCK_min.
+
+Fixes: 197b88fecc50 ("mtd: rawnand: arasan: Add new Arasan NAND controller")
+CC: stable@vger.kernel.org # 5.8+
+Signed-off-by: Olga Kitaina <okitain@gmail.com>
+Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20220628154824.12222-3-amit.kumar-mahapatra@xilinx.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/nand/raw/arasan-nand-controller.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
+index c5264fa223c4..296fb16c8dc3 100644
+--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
++++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
+@@ -1043,7 +1043,13 @@ static int anfc_setup_interface(struct nand_chip *chip, int target,
+                                DQS_BUFF_SEL_OUT(dqs_mode);
+       }
+ 
+-      anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
++      if (nand_interface_is_sdr(conf)) {
++              anand->clk = ANFC_XLNX_SDR_DFLT_CORE_CLK;
++      } else {
++              /* ONFI timings are defined in picoseconds */
++              anand->clk = div_u64((u64)NSEC_PER_SEC * 1000,
++                                   conf->timings.nvddr.tCK_min);
++      }
+ 
+       /*
+        * Due to a hardware bug in the ZynqMP SoC, SDR timing modes 0-1 work
+-- 
+2.35.1
+
diff --git a/queue-5.19/mtd-rawnand-arasan-update-nand-bus-clock-instead-of-.patch b/queue-5.19/mtd-rawnand-arasan-update-nand-bus-clock-instead-of-.patch

new file mode 100644 (file)

index 0000000..ee9e0c0
--- /dev/null
+++ b/queue-5.19/mtd-rawnand-arasan-update-nand-bus-clock-instead-of-.patch
@@ -0,0 +1,55 @@
+From 50be5f20b7d115ecf9abf210cb452dccfbc8cbd1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 21:18:23 +0530
+Subject: mtd: rawnand: arasan: Update NAND bus clock instead of system clock
+
+From: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
+
+[ Upstream commit 7499bfeedb47efc1ee4dc793b92c610d46e6d6a6 ]
+
+In current implementation the Arasan NAND driver is updating the
+system clock(i.e., anand->clk) in accordance to the timing modes
+(i.e., SDR or NVDDR). But as per the Arasan NAND controller spec the
+flash clock or the NAND bus clock(i.e., nfc->bus_clk), need to be
+updated instead. This patch keeps the system clock unchanged and updates
+the NAND bus clock as per the timing modes.
+
+Fixes: 197b88fecc50 ("mtd: rawnand: arasan: Add new Arasan NAND controller")
+CC: stable@vger.kernel.org # 5.8+
+Signed-off-by: Amit Kumar Mahapatra <amit.kumar-mahapatra@xilinx.com>
+Signed-off-by: Miquel Raynal <miquel.raynal@bootlin.com>
+Link: https://lore.kernel.org/linux-mtd/20220628154824.12222-2-amit.kumar-mahapatra@xilinx.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/mtd/nand/raw/arasan-nand-controller.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/mtd/nand/raw/arasan-nand-controller.c b/drivers/mtd/nand/raw/arasan-nand-controller.c
+index 53bd10738418..c5264fa223c4 100644
+--- a/drivers/mtd/nand/raw/arasan-nand-controller.c
++++ b/drivers/mtd/nand/raw/arasan-nand-controller.c
+@@ -347,17 +347,17 @@ static int anfc_select_target(struct nand_chip *chip, int target)
+ 
+       /* Update clock frequency */
+       if (nfc->cur_clk != anand->clk) {
+-              clk_disable_unprepare(nfc->controller_clk);
+-              ret = clk_set_rate(nfc->controller_clk, anand->clk);
++              clk_disable_unprepare(nfc->bus_clk);
++              ret = clk_set_rate(nfc->bus_clk, anand->clk);
+               if (ret) {
+                       dev_err(nfc->dev, "Failed to change clock rate\n");
+                       return ret;
+               }
+ 
+-              ret = clk_prepare_enable(nfc->controller_clk);
++              ret = clk_prepare_enable(nfc->bus_clk);
+               if (ret) {
+                       dev_err(nfc->dev,
+-                              "Failed to re-enable the controller clock\n");
++                              "Failed to re-enable the bus clock\n");
+                       return ret;
+               }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/net-9p-initialize-the-iounit-field-during-fid-creati.patch b/queue-5.19/net-9p-initialize-the-iounit-field-during-fid-creati.patch

new file mode 100644 (file)

index 0000000..49f84fb
--- /dev/null
+++ b/queue-5.19/net-9p-initialize-the-iounit-field-during-fid-creati.patch
@@ -0,0 +1,68 @@
+From f701e0dc941da8f665f5e01a21dd8984add1e04f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 10 Jul 2022 09:14:02 -0500
+Subject: net/9p: Initialize the iounit field during fid creation
+
+From: Tyler Hicks <tyhicks@linux.microsoft.com>
+
+[ Upstream commit aa7aeee169480e98cf41d83c01290a37e569be6d ]
+
+Ensure that the fid's iounit field is set to zero when a new fid is
+created. Certain 9P operations, such as OPEN and CREATE, allow the
+server to reply with an iounit size which the client code assigns to the
+p9_fid struct shortly after the fid is created by p9_fid_create(). On
+the other hand, an XATTRWALK operation doesn't allow for the server to
+specify an iounit value. The iounit field of the newly allocated p9_fid
+struct remained uninitialized in that case. Depending on allocation
+patterns, the iounit value could have been something reasonable that was
+carried over from previously freed fids or, in the worst case, could
+have been arbitrary values from non-fid related usages of the memory
+location.
+
+The bug was detected in the Windows Subsystem for Linux 2 (WSL2) kernel
+after the uninitialized iounit field resulted in the typical sequence of
+two getxattr(2) syscalls, one to get the size of an xattr and another
+after allocating a sufficiently sized buffer to fit the xattr value, to
+hit an unexpected ERANGE error in the second call to getxattr(2). An
+uninitialized iounit field would sometimes force rsize to be smaller
+than the xattr value size in p9_client_read_once() and the 9P server in
+WSL refused to chunk up the READ on the attr_fid and, instead, returned
+ERANGE to the client. The virtfs server in QEMU seems happy to chunk up
+the READ and this problem goes undetected there.
+
+Link: https://lkml.kernel.org/r/20220710141402.803295-1-tyhicks@linux.microsoft.com
+Fixes: ebf46264a004 ("fs/9p: Add support user. xattr")
+Cc: stable@vger.kernel.org
+Signed-off-by: Tyler Hicks <tyhicks@linux.microsoft.com>
+Reviewed-by: Christian Schoenebeck <linux_oss@crudebyte.com>
+Signed-off-by: Dominique Martinet <asmadeus@codewreck.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/9p/client.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/net/9p/client.c b/net/9p/client.c
+index 8bba0d9cf975..371519e7b885 100644
+--- a/net/9p/client.c
++++ b/net/9p/client.c
+@@ -889,16 +889,13 @@ static struct p9_fid *p9_fid_create(struct p9_client *clnt)
+       struct p9_fid *fid;
+ 
+       p9_debug(P9_DEBUG_FID, "clnt %p\n", clnt);
+-      fid = kmalloc(sizeof(*fid), GFP_KERNEL);
++      fid = kzalloc(sizeof(*fid), GFP_KERNEL);
+       if (!fid)
+               return NULL;
+ 
+-      memset(&fid->qid, 0, sizeof(fid->qid));
+       fid->mode = -1;
+       fid->uid = current_fsuid();
+       fid->clnt = clnt;
+-      fid->rdir = NULL;
+-      fid->fid = 0;
+       refcount_set(&fid->count, 1);
+ 
+       idr_preload(GFP_KERNEL);
+-- 
+2.35.1
+
diff --git a/queue-5.19/ovl-drop-warn_on-dentry-is-null-in-ovl_encode_fh.patch-29266 b/queue-5.19/ovl-drop-warn_on-dentry-is-null-in-ovl_encode_fh.patch-29266

new file mode 100644 (file)

index 0000000..576a78d
--- /dev/null
+++ b/queue-5.19/ovl-drop-warn_on-dentry-is-null-in-ovl_encode_fh.patch-29266
@@ -0,0 +1,62 @@
+From ba2c0597b0c1aaf5277e7c02c68e8535863025bc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 19:49:15 +0800
+Subject: ovl: drop WARN_ON() dentry is NULL in ovl_encode_fh()
+
+From: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+
+[ Upstream commit dd524b7f317de8d31d638cbfdc7be4cf9b770e42 ]
+
+Some code paths cannot guarantee the inode have any dentry alias. So
+WARN_ON() all !dentry may flood the kernel logs.
+
+For example, when an overlayfs inode is watched by inotifywait (1), and
+someone is trying to read the /proc/$(pidof inotifywait)/fdinfo/INOTIFY_FD,
+at that time if the dentry has been reclaimed by kernel (such as
+echo 2 > /proc/sys/vm/drop_caches), there will be a WARN_ON(). The
+printed call stack would be like:
+
+    ? show_mark_fhandle+0xf0/0xf0
+    show_mark_fhandle+0x4a/0xf0
+    ? show_mark_fhandle+0xf0/0xf0
+    ? seq_vprintf+0x30/0x50
+    ? seq_printf+0x53/0x70
+    ? show_mark_fhandle+0xf0/0xf0
+    inotify_fdinfo+0x70/0x90
+    show_fdinfo.isra.4+0x53/0x70
+    seq_show+0x130/0x170
+    seq_read+0x153/0x440
+    vfs_read+0x94/0x150
+    ksys_read+0x5f/0xe0
+    do_syscall_64+0x59/0x1e0
+    entry_SYSCALL_64_after_hwframe+0x44/0xa9
+
+So let's drop WARN_ON() to avoid kernel log flooding.
+
+Reported-by: Hongbo Yin <yinhongbo@bytedance.com>
+Signed-off-by: Jiachen Zhang <zhangjiachen.jaycee@bytedance.com>
+Signed-off-by: Tianci Zhang <zhangtianci.1997@bytedance.com>
+Fixes: 8ed5eec9d6c4 ("ovl: encode pure upper file handles")
+Cc: <stable@vger.kernel.org> # v4.16
+Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/export.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
+index 2eada97bbd23..e065a5b9a442 100644
+--- a/fs/overlayfs/export.c
++++ b/fs/overlayfs/export.c
+@@ -259,7 +259,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
+               return FILEID_INVALID;
+ 
+       dentry = d_find_any_alias(inode);
+-      if (WARN_ON(!dentry))
++      if (!dentry)
+               return FILEID_INVALID;
+ 
+       bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
+-- 
+2.35.1
+
diff --git a/queue-5.19/parisc-check-the-return-value-of-ioremap-in-lba_driv.patch b/queue-5.19/parisc-check-the-return-value-of-ioremap-in-lba_driv.patch

new file mode 100644 (file)

index 0000000..e8732d6
--- /dev/null
+++ b/queue-5.19/parisc-check-the-return-value-of-ioremap-in-lba_driv.patch
@@ -0,0 +1,44 @@
+From 9a81465effab09f20d63b68e363001be45ebe2c4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 22 Jul 2022 10:57:09 +0800
+Subject: parisc: Check the return value of ioremap() in lba_driver_probe()
+
+From: William Dean <williamsukatube@gmail.com>
+
+[ Upstream commit cf59f34d7f978d14d6520fd80a78a5ad5cb8abf8 ]
+
+The function ioremap() in lba_driver_probe() can fail, so
+its return value should be checked.
+
+Fixes: 4bdc0d676a643 ("remove ioremap_nocache and devm_ioremap_nocache")
+Reported-by: Hacash Robot <hacashRobot@santino.com>
+Signed-off-by: William Dean <williamsukatube@gmail.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Cc: <stable@vger.kernel.org> # v5.6+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/parisc/lba_pci.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/parisc/lba_pci.c b/drivers/parisc/lba_pci.c
+index 732b516c7bf8..afc6e66ddc31 100644
+--- a/drivers/parisc/lba_pci.c
++++ b/drivers/parisc/lba_pci.c
+@@ -1476,9 +1476,13 @@ lba_driver_probe(struct parisc_device *dev)
+       u32 func_class;
+       void *tmp_obj;
+       char *version;
+-      void __iomem *addr = ioremap(dev->hpa.start, 4096);
++      void __iomem *addr;
+       int max;
+ 
++      addr = ioremap(dev->hpa.start, 4096);
++      if (addr == NULL)
++              return -ENOMEM;
++
+       /* Read HW Rev First */
+       func_class = READ_REG32(addr + LBA_FCLASS);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/parisc-drop-pa_swapper_pg_lock-spinlock.patch-26906 b/queue-5.19/parisc-drop-pa_swapper_pg_lock-spinlock.patch-26906

new file mode 100644 (file)

index 0000000..af6a8fc
--- /dev/null
+++ b/queue-5.19/parisc-drop-pa_swapper_pg_lock-spinlock.patch-26906
@@ -0,0 +1,39 @@
+From c22343f0a3caf17518da7e6bc969dd3cc55726a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Jul 2022 06:19:41 +0200
+Subject: parisc: Drop pa_swapper_pg_lock spinlock
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit 3fbc9a7de0564c55d8a9584c9cd2c9dfe6bd6d43 ]
+
+This spinlock was dropped with commit b7795074a046 ("parisc: Optimize
+per-pagetable spinlocks") in kernel v5.12.
+
+Remove it to silence a sparse warning.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Reported-by: kernel test robot <lkp@intel.com>
+Cc: <stable@vger.kernel.org> # v5.12+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/parisc/kernel/cache.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
+index a9bc578e4c52..af3d7cdc1541 100644
+--- a/arch/parisc/kernel/cache.c
++++ b/arch/parisc/kernel/cache.c
+@@ -50,9 +50,6 @@ void flush_instruction_cache_local(void); /* flushes local code-cache only */
+  */
+ DEFINE_SPINLOCK(pa_tlb_flush_lock);
+ 
+-/* Swapper page setup lock. */
+-DEFINE_SPINLOCK(pa_swapper_pg_lock);
+-
+ #if defined(CONFIG_64BIT) && defined(CONFIG_SMP)
+ int pa_serialize_tlb_flushes __ro_after_init;
+ #endif
+-- 
+2.35.1
+
diff --git a/queue-5.19/parisc-fix-device-names-in-proc-iomem.patch-18836 b/queue-5.19/parisc-fix-device-names-in-proc-iomem.patch-18836

new file mode 100644 (file)

index 0000000..15956da
--- /dev/null
+++ b/queue-5.19/parisc-fix-device-names-in-proc-iomem.patch-18836
@@ -0,0 +1,50 @@
+From 156a90cb6307af38a8e9e3293999b846a0a84fa0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 18 Jul 2022 17:06:47 +0200
+Subject: parisc: Fix device names in /proc/iomem
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit cab56b51ec0e69128909cef4650e1907248d821b ]
+
+Fix the output of /proc/iomem to show the real hardware device name
+including the pa_pathname, e.g. "Merlin 160 Core Centronics [8:16:0]".
+Up to now only the pa_pathname ("[8:16.0]") was shown.
+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Cc: <stable@vger.kernel.org> # v4.9+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/parisc/kernel/drivers.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c
+index 776d624a7207..d126e78e101a 100644
+--- a/arch/parisc/kernel/drivers.c
++++ b/arch/parisc/kernel/drivers.c
+@@ -520,7 +520,6 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path)
+       dev->id.hversion_rev = iodc_data[1] & 0x0f;
+       dev->id.sversion = ((iodc_data[4] & 0x0f) << 16) |
+                       (iodc_data[5] << 8) | iodc_data[6];
+-      dev->hpa.name = parisc_pathname(dev);
+       dev->hpa.start = hpa;
+       /* This is awkward.  The STI spec says that gfx devices may occupy
+        * 32MB or 64MB.  Unfortunately, we don't know how to tell whether
+@@ -534,10 +533,10 @@ alloc_pa_dev(unsigned long hpa, struct hardware_path *mod_path)
+               dev->hpa.end = hpa + 0xfff;
+       }
+       dev->hpa.flags = IORESOURCE_MEM;
+-      name = parisc_hardware_description(&dev->id);
+-      if (name) {
+-              strlcpy(dev->name, name, sizeof(dev->name));
+-      }
++      dev->hpa.name = dev->name;
++      name = parisc_hardware_description(&dev->id) ? : "unknown";
++      snprintf(dev->name, sizeof(dev->name), "%s [%s]",
++              name, parisc_pathname(dev));
+ 
+       /* Silently fail things like mouse ports which are subsumed within
+        * the keyboard controller
+-- 
+2.35.1
+
diff --git a/queue-5.19/parisc-io_pgetevents_time64-needs-compat-syscall-in-.patch b/queue-5.19/parisc-io_pgetevents_time64-needs-compat-syscall-in-.patch

new file mode 100644 (file)

index 0000000..189d46a
--- /dev/null
+++ b/queue-5.19/parisc-io_pgetevents_time64-needs-compat-syscall-in-.patch
@@ -0,0 +1,42 @@
+From a66c3e5c25c595fb101a1744bda69271cebe8ed3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Aug 2022 17:36:15 +0200
+Subject: parisc: io_pgetevents_time64() needs compat syscall in 32-bit compat
+ mode
+
+From: Helge Deller <deller@gmx.de>
+
+[ Upstream commit 6431e92fc827bdd2d28f79150d90415ba9ce0d21 ]
+
+For all syscalls in 32-bit compat mode on 64-bit kernels the upper
+32-bits of the 64-bit registers are zeroed out, so a negative 32-bit
+signed value will show up as positive 64-bit signed value.
+
+This behaviour breaks the io_pgetevents_time64() syscall which expects
+signed 64-bit values for the "min_nr" and "nr" parameters.
+Fix this by switching to the compat_sys_io_pgetevents_time64() syscall,
+which uses "compat_long_t" types for those parameters.
+
+Cc: <stable@vger.kernel.org> # v5.1+
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/parisc/kernel/syscalls/syscall.tbl | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl
+index 68b46fe2f17c..8a99c998da9b 100644
+--- a/arch/parisc/kernel/syscalls/syscall.tbl
++++ b/arch/parisc/kernel/syscalls/syscall.tbl
+@@ -413,7 +413,7 @@
+ 412   32      utimensat_time64                sys_utimensat                   sys_utimensat
+ 413   32      pselect6_time64                 sys_pselect6                    compat_sys_pselect6_time64
+ 414   32      ppoll_time64                    sys_ppoll                       compat_sys_ppoll_time64
+-416   32      io_pgetevents_time64            sys_io_pgetevents               sys_io_pgetevents
++416   32      io_pgetevents_time64            sys_io_pgetevents               compat_sys_io_pgetevents_time64
+ 417   32      recvmmsg_time64                 sys_recvmmsg                    compat_sys_recvmmsg_time64
+ 418   32      mq_timedsend_time64             sys_mq_timedsend                sys_mq_timedsend
+ 419   32      mq_timedreceive_time64          sys_mq_timedreceive             sys_mq_timedreceive
+-- 
+2.35.1
+
diff --git a/queue-5.19/pci-aer-iterate-over-error-counters-instead-of-error.patch b/queue-5.19/pci-aer-iterate-over-error-counters-instead-of-error.patch

new file mode 100644 (file)

index 0000000..5f09274
--- /dev/null
+++ b/queue-5.19/pci-aer-iterate-over-error-counters-instead-of-error.patch
@@ -0,0 +1,61 @@
+From c92eaff5283892c1b13a4e308429f5b0ecfd9d06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 May 2022 18:14:41 +0000
+Subject: PCI/AER: Iterate over error counters instead of error strings
+
+From: Mohamed Khalfella <mkhalfella@purestorage.com>
+
+[ Upstream commit 5e6ae050955b566484f3cc6a66e3925eae87a0ed ]
+
+Previously we iterated over AER stat *names*, e.g.,
+aer_correctable_error_string[32], but the actual stat *counters* may not be
+that large, e.g., pdev->aer_stats->dev_cor_errs[16], which means that we
+printed junk in the sysfs stats files.
+
+Iterate over the stat counter arrays instead of the names to avoid this
+junk.
+
+Also, added a build time check to make sure all
+counters have entries in strings array.
+
+Fixes: 0678e3109a3c ("PCI/AER: Simplify __aer_print_error()")
+Link: https://lore.kernel.org/r/20220509181441.31884-1-mkhalfella@purestorage.com
+Reported-by: Meeta Saggi <msaggi@purestorage.com>
+Signed-off-by: Mohamed Khalfella <mkhalfella@purestorage.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Meeta Saggi <msaggi@purestorage.com>
+Reviewed-by: Eric Badger <ebadger@purestorage.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/pcie/aer.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
+index 7952e5efd6cf..a1e38ca93cd9 100644
+--- a/drivers/pci/pcie/aer.c
++++ b/drivers/pci/pcie/aer.c
+@@ -538,7 +538,7 @@ static const char *aer_agent_string[] = {
+       u64 *stats = pdev->aer_stats->stats_array;                      \
+       size_t len = 0;                                                 \
+                                                                       \
+-      for (i = 0; i < ARRAY_SIZE(strings_array); i++) {               \
++      for (i = 0; i < ARRAY_SIZE(pdev->aer_stats->stats_array); i++) {\
+               if (strings_array[i])                                   \
+                       len += sysfs_emit_at(buf, len, "%s %llu\n",     \
+                                            strings_array[i],          \
+@@ -1347,6 +1347,11 @@ static int aer_probe(struct pcie_device *dev)
+       struct device *device = &dev->device;
+       struct pci_dev *port = dev->port;
+ 
++      BUILD_BUG_ON(ARRAY_SIZE(aer_correctable_error_string) <
++                   AER_MAX_TYPEOF_COR_ERRS);
++      BUILD_BUG_ON(ARRAY_SIZE(aer_uncorrectable_error_string) <
++                   AER_MAX_TYPEOF_UNCOR_ERRS);
++
+       /* Limit to Root Ports or Root Complex Event Collectors */
+       if ((pci_pcie_type(port) != PCI_EXP_TYPE_RC_EC) &&
+           (pci_pcie_type(port) != PCI_EXP_TYPE_ROOT_PORT))
+-- 
+2.35.1
+
diff --git a/queue-5.19/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch b/queue-5.19/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch

new file mode 100644 (file)

index 0000000..c868734
--- /dev/null
+++ b/queue-5.19/pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch
@@ -0,0 +1,111 @@
+From 6b7e8c26356c83671bddb493326d7b6ba6b45b6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 17:50:03 +0200
+Subject: PCI: qcom: Power on PHY before IPQ8074 DBI register accesses
+
+From: Robert Marko <robimarko@gmail.com>
+
+[ Upstream commit a0e43bb9973b06ce5c666f0901e104e2037c1b34 ]
+
+Currently the Gen2 port in IPQ8074 will cause the system to hang as it
+accesses DBI registers in qcom_pcie_init_2_3_3(), and those are only
+accesible after phy_power_on().
+
+Move the DBI read/writes to a new qcom_pcie_post_init_2_3_3(), which is
+executed after phy_power_on().
+
+Link: https://lore.kernel.org/r/20220623155004.688090-1-robimarko@gmail.com
+Fixes: a0fd361db8e5 ("PCI: dwc: Move "dbi", "dbi2", and "addr_space" resource setup into common code")
+Signed-off-by: Robert Marko <robimarko@gmail.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Dmitry Baryshkov <dmitry.baryshkov@linaro.org>
+Cc: stable@vger.kernel.org     # v5.11+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/controller/dwc/pcie-qcom.c | 48 +++++++++++++++-----------
+ 1 file changed, 28 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/pci/controller/dwc/pcie-qcom.c b/drivers/pci/controller/dwc/pcie-qcom.c
+index 2ea13750b492..3bbe1612a930 100644
+--- a/drivers/pci/controller/dwc/pcie-qcom.c
++++ b/drivers/pci/controller/dwc/pcie-qcom.c
+@@ -1038,9 +1038,7 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+       struct qcom_pcie_resources_2_3_3 *res = &pcie->res.v2_3_3;
+       struct dw_pcie *pci = pcie->pci;
+       struct device *dev = pci->dev;
+-      u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
+       int i, ret;
+-      u32 val;
+ 
+       for (i = 0; i < ARRAY_SIZE(res->rst); i++) {
+               ret = reset_control_assert(res->rst[i]);
+@@ -1097,6 +1095,33 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+               goto err_clk_aux;
+       }
+ 
++      return 0;
++
++err_clk_aux:
++      clk_disable_unprepare(res->ahb_clk);
++err_clk_ahb:
++      clk_disable_unprepare(res->axi_s_clk);
++err_clk_axi_s:
++      clk_disable_unprepare(res->axi_m_clk);
++err_clk_axi_m:
++      clk_disable_unprepare(res->iface);
++err_clk_iface:
++      /*
++       * Not checking for failure, will anyway return
++       * the original failure in 'ret'.
++       */
++      for (i = 0; i < ARRAY_SIZE(res->rst); i++)
++              reset_control_assert(res->rst[i]);
++
++      return ret;
++}
++
++static int qcom_pcie_post_init_2_3_3(struct qcom_pcie *pcie)
++{
++      struct dw_pcie *pci = pcie->pci;
++      u16 offset = dw_pcie_find_capability(pci, PCI_CAP_ID_EXP);
++      u32 val;
++
+       writel(SLV_ADDR_SPACE_SZ,
+               pcie->parf + PCIE20_v3_PARF_SLV_ADDR_SPACE_SIZE);
+ 
+@@ -1124,24 +1149,6 @@ static int qcom_pcie_init_2_3_3(struct qcom_pcie *pcie)
+               PCI_EXP_DEVCTL2);
+ 
+       return 0;
+-
+-err_clk_aux:
+-      clk_disable_unprepare(res->ahb_clk);
+-err_clk_ahb:
+-      clk_disable_unprepare(res->axi_s_clk);
+-err_clk_axi_s:
+-      clk_disable_unprepare(res->axi_m_clk);
+-err_clk_axi_m:
+-      clk_disable_unprepare(res->iface);
+-err_clk_iface:
+-      /*
+-       * Not checking for failure, will anyway return
+-       * the original failure in 'ret'.
+-       */
+-      for (i = 0; i < ARRAY_SIZE(res->rst); i++)
+-              reset_control_assert(res->rst[i]);
+-
+-      return ret;
+ }
+ 
+ static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
+@@ -1467,6 +1474,7 @@ static const struct qcom_pcie_ops ops_2_4_0 = {
+ static const struct qcom_pcie_ops ops_2_3_3 = {
+       .get_resources = qcom_pcie_get_resources_2_3_3,
+       .init = qcom_pcie_init_2_3_3,
++      .post_init = qcom_pcie_post_init_2_3_3,
+       .deinit = qcom_pcie_deinit_2_3_3,
+       .ltssm_enable = qcom_pcie_2_3_2_ltssm_enable,
+ };
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-64e-fix-early-tlb-miss-with-kuap.patch-29650 b/queue-5.19/powerpc-64e-fix-early-tlb-miss-with-kuap.patch-29650

new file mode 100644 (file)

index 0000000..ef947a0
--- /dev/null
+++ b/queue-5.19/powerpc-64e-fix-early-tlb-miss-with-kuap.patch-29650
@@ -0,0 +1,93 @@
+From b57c15bda53986f1a0827a03ea2444bd6bc5bbbf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 16:48:54 +0200
+Subject: powerpc/64e: Fix early TLB miss with KUAP
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 09317643117ade87c03158341e87466413fa8f1a ]
+
+With KUAP, the TLB miss handler bails out when an access to user
+memory is performed with a nul TID.
+
+But the normal TLB miss routine which is only used early during boot
+does the check regardless for all memory areas, not only user memory.
+
+By chance there is no early IO or vmalloc access, but when KASAN
+come we will start having early TLB misses.
+
+Fix it by creating a special branch for user accesses similar to the
+one in the 'bolted' TLB miss handlers. Unfortunately SPRN_MAS1 is
+now read too early and there are no registers available to preserve
+it so it will be read a second time.
+
+Fixes: 57bc963837f5 ("powerpc/kuap: Wire-up KUAP on book3e/64")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/8d6c5859a45935d6e1a336da4dc20be421e8cea7.1656427701.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/nohash/tlb_low_64e.S | 17 ++++++++---------
+ 1 file changed, 8 insertions(+), 9 deletions(-)
+
+diff --git a/arch/powerpc/mm/nohash/tlb_low_64e.S b/arch/powerpc/mm/nohash/tlb_low_64e.S
+index 8b97c4acfebf..9e9ab3803fb2 100644
+--- a/arch/powerpc/mm/nohash/tlb_low_64e.S
++++ b/arch/powerpc/mm/nohash/tlb_low_64e.S
+@@ -583,7 +583,7 @@ itlb_miss_fault_e6500:
+        */
+       rlwimi  r11,r14,32-19,27,27
+       rlwimi  r11,r14,32-16,19,19
+-      beq     normal_tlb_miss
++      beq     normal_tlb_miss_user
+       /* XXX replace the RMW cycles with immediate loads + writes */
+ 1:    mfspr   r10,SPRN_MAS1
+       cmpldi  cr0,r15,8               /* Check for vmalloc region */
+@@ -626,7 +626,7 @@ itlb_miss_fault_e6500:
+ 
+       cmpldi  cr0,r15,0                       /* Check for user region */
+       std     r14,EX_TLB_ESR(r12)             /* write crazy -1 to frame */
+-      beq     normal_tlb_miss
++      beq     normal_tlb_miss_user
+ 
+       li      r11,_PAGE_PRESENT|_PAGE_BAP_SX  /* Base perm */
+       oris    r11,r11,_PAGE_ACCESSED@h
+@@ -653,6 +653,12 @@ itlb_miss_fault_e6500:
+  * r11 = PTE permission mask
+  * r10 = crap (free to use)
+  */
++normal_tlb_miss_user:
++#ifdef CONFIG_PPC_KUAP
++      mfspr   r14,SPRN_MAS1
++      rlwinm. r14,r14,0,0x3fff0000
++      beq-    normal_tlb_miss_access_fault /* KUAP fault */
++#endif
+ normal_tlb_miss:
+       /* So we first construct the page table address. We do that by
+        * shifting the bottom of the address (not the region ID) by
+@@ -683,11 +689,6 @@ finish_normal_tlb_miss:
+       /* Check if required permissions are met */
+       andc.   r15,r11,r14
+       bne-    normal_tlb_miss_access_fault
+-#ifdef CONFIG_PPC_KUAP
+-      mfspr   r11,SPRN_MAS1
+-      rlwinm. r10,r11,0,0x3fff0000
+-      beq-    normal_tlb_miss_access_fault /* KUAP fault */
+-#endif
+ 
+       /* Now we build the MAS:
+        *
+@@ -709,9 +710,7 @@ finish_normal_tlb_miss:
+       rldicl  r10,r14,64-8,64-8
+       cmpldi  cr0,r10,BOOK3E_PAGESZ_4K
+       beq-    1f
+-#ifndef CONFIG_PPC_KUAP
+       mfspr   r11,SPRN_MAS1
+-#endif
+       rlwimi  r11,r14,31,21,24
+       rlwinm  r11,r11,0,21,19
+       mtspr   SPRN_MAS1,r11
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-fsl-pci-fix-class-code-of-pcie-root-port.patch-7836 b/queue-5.19/powerpc-fsl-pci-fix-class-code-of-pcie-root-port.patch-7836

new file mode 100644 (file)

index 0000000..9dc0ca9
--- /dev/null
+++ b/queue-5.19/powerpc-fsl-pci-fix-class-code-of-pcie-root-port.patch-7836
@@ -0,0 +1,93 @@
+From 786ebb67ed1e8ac601ff8f1dcdfdc3e12e3cb5b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Jul 2022 12:10:43 +0200
+Subject: powerpc/fsl-pci: Fix Class Code of PCIe Root Port
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Pali Rohár <pali@kernel.org>
+
+[ Upstream commit 0c551abfa004ce154d487d91777bf221c808a64f ]
+
+By default old pre-3.0 Freescale PCIe controllers reports invalid PCI Class
+Code 0x0b20 for PCIe Root Port. It can be seen by lspci -b output on P2020
+board which has this pre-3.0 controller:
+
+  $ lspci -bvnn
+  00:00.0 Power PC [0b20]: Freescale Semiconductor Inc P2020E [1957:0070] (rev 21)
+          !!! Invalid class 0b20 for header type 01
+          Capabilities: [4c] Express Root Port (Slot-), MSI 00
+
+Fix this issue by programming correct PCI Class Code 0x0604 for PCIe Root
+Port to the Freescale specific PCIe register 0x474.
+
+With this change lspci -b output is:
+
+  $ lspci -bvnn
+  00:00.0 PCI bridge [0604]: Freescale Semiconductor Inc P2020E [1957:0070] (rev 21) (prog-if 00 [Normal decode])
+          Capabilities: [4c] Express Root Port (Slot-), MSI 00
+
+Without any "Invalid class" error. So class code was properly reflected
+into standard (read-only) PCI register 0x08.
+
+Same fix is already implemented in U-Boot pcie_fsl.c driver in commit:
+http://source.denx.de/u-boot/u-boot/-/commit/d18d06ac35229345a0af80977a408cfbe1d1015b
+
+Fix activated by U-Boot stay active also after booting Linux kernel.
+But boards which use older U-Boot version without that fix are affected and
+still require this fix.
+
+So implement this class code fix also in kernel fsl_pci.c driver.
+
+Cc: stable@vger.kernel.org
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220706101043.4867-1-pali@kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/sysdev/fsl_pci.c | 8 ++++++++
+ arch/powerpc/sysdev/fsl_pci.h | 1 +
+ 2 files changed, 9 insertions(+)
+
+diff --git a/arch/powerpc/sysdev/fsl_pci.c b/arch/powerpc/sysdev/fsl_pci.c
+index 1011cfea2e32..bfbb8c8fc9aa 100644
+--- a/arch/powerpc/sysdev/fsl_pci.c
++++ b/arch/powerpc/sysdev/fsl_pci.c
+@@ -521,6 +521,7 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+       struct resource rsrc;
+       const int *bus_range;
+       u8 hdr_type, progif;
++      u32 class_code;
+       struct device_node *dev;
+       struct ccsr_pci __iomem *pci;
+       u16 temp;
+@@ -594,6 +595,13 @@ int fsl_add_bridge(struct platform_device *pdev, int is_primary)
+                       PPC_INDIRECT_TYPE_SURPRESS_PRIMARY_BUS;
+               if (fsl_pcie_check_link(hose))
+                       hose->indirect_type |= PPC_INDIRECT_TYPE_NO_PCIE_LINK;
++              /* Fix Class Code to PCI_CLASS_BRIDGE_PCI_NORMAL for pre-3.0 controller */
++              if (in_be32(&pci->block_rev1) < PCIE_IP_REV_3_0) {
++                      early_read_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, &class_code);
++                      class_code &= 0xff;
++                      class_code |= PCI_CLASS_BRIDGE_PCI_NORMAL << 8;
++                      early_write_config_dword(hose, 0, 0, PCIE_FSL_CSR_CLASSCODE, class_code);
++              }
+       } else {
+               /*
+                * Set PBFR(PCI Bus Function Register)[10] = 1 to
+diff --git a/arch/powerpc/sysdev/fsl_pci.h b/arch/powerpc/sysdev/fsl_pci.h
+index cdbde2e0c96e..093a875d7d1e 100644
+--- a/arch/powerpc/sysdev/fsl_pci.h
++++ b/arch/powerpc/sysdev/fsl_pci.h
+@@ -18,6 +18,7 @@ struct platform_device;
+ 
+ #define PCIE_LTSSM    0x0404          /* PCIE Link Training and Status */
+ #define PCIE_LTSSM_L0 0x16            /* L0 state */
++#define PCIE_FSL_CSR_CLASSCODE        0x474   /* FSL GPEX CSR */
+ #define PCIE_IP_REV_2_2               0x02080202 /* PCIE IP block version Rev2.2 */
+ #define PCIE_IP_REV_3_0               0x02080300 /* PCIE IP block version Rev3.0 */
+ #define PIWAR_EN              0x80000000      /* Enable */
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-powernv-avoid-crashing-if-rng-is-null.patch-9536 b/queue-5.19/powerpc-powernv-avoid-crashing-if-rng-is-null.patch-9536

new file mode 100644 (file)

index 0000000..589f237
--- /dev/null
+++ b/queue-5.19/powerpc-powernv-avoid-crashing-if-rng-is-null.patch-9536
@@ -0,0 +1,44 @@
+From a167a432b19b5f7084da36f2a360ff5a55a3a4e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 00:32:17 +1000
+Subject: powerpc/powernv: Avoid crashing if rng is NULL
+
+From: Michael Ellerman <mpe@ellerman.id.au>
+
+[ Upstream commit 90b5d4fe0b3ba7f589c6723c6bfb559d9e83956a ]
+
+On a bare-metal Power8 system that doesn't have an "ibm,power-rng", a
+malicious QEMU and guest that ignore the absence of the
+KVM_CAP_PPC_HWRNG flag, and calls H_RANDOM anyway, will dereference a
+NULL pointer.
+
+In practice all Power8 machines have an "ibm,power-rng", but let's not
+rely on that, add a NULL check and early return in
+powernv_get_random_real_mode().
+
+Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
+Cc: stable@vger.kernel.org # v4.1+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220727143219.2684192-1-mpe@ellerman.id.au
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/platforms/powernv/rng.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
+index 3805ad13b8f3..2287c9cd0cd5 100644
+--- a/arch/powerpc/platforms/powernv/rng.c
++++ b/arch/powerpc/platforms/powernv/rng.c
+@@ -63,6 +63,8 @@ int powernv_get_random_real_mode(unsigned long *v)
+       struct powernv_rng *rng;
+ 
+       rng = raw_cpu_read(powernv_rng);
++      if (!rng)
++              return 0;
+ 
+       *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch b/queue-5.19/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch

new file mode 100644 (file)

index 0000000..e4cfe4d
--- /dev/null
+++ b/queue-5.19/powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch
@@ -0,0 +1,145 @@
+From bd1fc5408f65f18f6812c0a58b7424c0ff24a0e7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 28 Jul 2022 00:32:18 +1000
+Subject: powerpc/powernv/kvm: Use darn for H_RANDOM on Power9
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit 7ef3d06f1bc4a5e62273726f3dc2bd258ae1c71f ]
+
+The existing logic in KVM to support guests calling H_RANDOM only works
+on Power8, because it looks for an RNG in the device tree, but on Power9
+we just use darn.
+
+In addition the existing code needs to work in real mode, so we have the
+special cased powernv_get_random_real_mode() to deal with that.
+
+Instead just have KVM call ppc_md.get_random_seed(), and do the real
+mode check inside of there, that way we use whatever RNG is available,
+including darn on Power9.
+
+Fixes: e928e9cb3601 ("KVM: PPC: Book3S HV: Add fast real-mode H_RANDOM implementation.")
+Cc: stable@vger.kernel.org # v4.1+
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Tested-by: Sachin Sant <sachinp@linux.ibm.com>
+[mpe: Rebase on previous commit, update change log appropriately]
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/20220727143219.2684192-2-mpe@ellerman.id.au
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/archrandom.h |  5 ----
+ arch/powerpc/kvm/book3s_hv_builtin.c  |  7 +++---
+ arch/powerpc/platforms/powernv/rng.c  | 36 ++++++---------------------
+ 3 files changed, 12 insertions(+), 36 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/archrandom.h b/arch/powerpc/include/asm/archrandom.h
+index 9a53e29680f4..258174304904 100644
+--- a/arch/powerpc/include/asm/archrandom.h
++++ b/arch/powerpc/include/asm/archrandom.h
+@@ -38,12 +38,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
+ #endif /* CONFIG_ARCH_RANDOM */
+ 
+ #ifdef CONFIG_PPC_POWERNV
+-int powernv_hwrng_present(void);
+ int powernv_get_random_long(unsigned long *v);
+-int powernv_get_random_real_mode(unsigned long *v);
+-#else
+-static inline int powernv_hwrng_present(void) { return 0; }
+-static inline int powernv_get_random_real_mode(unsigned long *v) { return 0; }
+ #endif
+ 
+ #endif /* _ASM_POWERPC_ARCHRANDOM_H */
+diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
+index 88a8f6473c4e..3abaef5f9ac2 100644
+--- a/arch/powerpc/kvm/book3s_hv_builtin.c
++++ b/arch/powerpc/kvm/book3s_hv_builtin.c
+@@ -19,7 +19,7 @@
+ #include <asm/interrupt.h>
+ #include <asm/kvm_ppc.h>
+ #include <asm/kvm_book3s.h>
+-#include <asm/archrandom.h>
++#include <asm/machdep.h>
+ #include <asm/xics.h>
+ #include <asm/xive.h>
+ #include <asm/dbell.h>
+@@ -176,13 +176,14 @@ EXPORT_SYMBOL_GPL(kvmppc_hcall_impl_hv_realmode);
+ 
+ int kvmppc_hwrng_present(void)
+ {
+-      return powernv_hwrng_present();
++      return ppc_md.get_random_seed != NULL;
+ }
+ EXPORT_SYMBOL_GPL(kvmppc_hwrng_present);
+ 
+ long kvmppc_rm_h_random(struct kvm_vcpu *vcpu)
+ {
+-      if (powernv_get_random_real_mode(&vcpu->arch.regs.gpr[4]))
++      if (ppc_md.get_random_seed &&
++          ppc_md.get_random_seed(&vcpu->arch.regs.gpr[4]))
+               return H_SUCCESS;
+ 
+       return H_HARDWARE;
+diff --git a/arch/powerpc/platforms/powernv/rng.c b/arch/powerpc/platforms/powernv/rng.c
+index 2287c9cd0cd5..d19305292e1e 100644
+--- a/arch/powerpc/platforms/powernv/rng.c
++++ b/arch/powerpc/platforms/powernv/rng.c
+@@ -29,15 +29,6 @@ struct powernv_rng {
+ 
+ static DEFINE_PER_CPU(struct powernv_rng *, powernv_rng);
+ 
+-int powernv_hwrng_present(void)
+-{
+-      struct powernv_rng *rng;
+-
+-      rng = get_cpu_var(powernv_rng);
+-      put_cpu_var(rng);
+-      return rng != NULL;
+-}
+-
+ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+ {
+       unsigned long parity;
+@@ -58,19 +49,6 @@ static unsigned long rng_whiten(struct powernv_rng *rng, unsigned long val)
+       return val;
+ }
+ 
+-int powernv_get_random_real_mode(unsigned long *v)
+-{
+-      struct powernv_rng *rng;
+-
+-      rng = raw_cpu_read(powernv_rng);
+-      if (!rng)
+-              return 0;
+-
+-      *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
+-
+-      return 1;
+-}
+-
+ static int powernv_get_random_darn(unsigned long *v)
+ {
+       unsigned long val;
+@@ -107,12 +85,14 @@ int powernv_get_random_long(unsigned long *v)
+ {
+       struct powernv_rng *rng;
+ 
+-      rng = get_cpu_var(powernv_rng);
+-
+-      *v = rng_whiten(rng, in_be64(rng->regs));
+-
+-      put_cpu_var(rng);
+-
++      if (mfmsr() & MSR_DR) {
++              rng = get_cpu_var(powernv_rng);
++              *v = rng_whiten(rng, in_be64(rng->regs));
++              put_cpu_var(rng);
++      } else {
++              rng = raw_cpu_read(powernv_rng);
++              *v = rng_whiten(rng, __raw_rm_readq(rng->regs_real));
++      }
+       return 1;
+ }
+ EXPORT_SYMBOL_GPL(powernv_get_random_long);
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-ptdump-fix-display-of-rw-pages-on-fsl_book3e.patch-3011 b/queue-5.19/powerpc-ptdump-fix-display-of-rw-pages-on-fsl_book3e.patch-3011

new file mode 100644 (file)

index 0000000..78fa7cd
--- /dev/null
+++ b/queue-5.19/powerpc-ptdump-fix-display-of-rw-pages-on-fsl_book3e.patch-3011
@@ -0,0 +1,51 @@
+From 025ddb197f7de58ae9c168a668e11e460880a24f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jun 2022 16:43:35 +0200
+Subject: powerpc/ptdump: Fix display of RW pages on FSL_BOOK3E
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit dd8de84b57b02ba9c1fe530a6d916c0853f136bd ]
+
+On FSL_BOOK3E, _PAGE_RW is defined with two bits, one for user and one
+for supervisor. As soon as one of the two bits is set, the page has
+to be display as RW. But the way it is implemented today requires both
+bits to be set in order to display it as RW.
+
+Instead of display RW when _PAGE_RW bits are set and R otherwise,
+reverse the logic and display R when _PAGE_RW bits are all 0 and
+RW otherwise.
+
+This change has no impact on other platforms as _PAGE_RW is a single
+bit on all of them.
+
+Fixes: 8eb07b187000 ("powerpc/mm: Dump linux pagetables")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/0c33b96317811edf691e81698aaee8fa45ec3449.1656427391.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/mm/ptdump/shared.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/arch/powerpc/mm/ptdump/shared.c b/arch/powerpc/mm/ptdump/shared.c
+index 03607ab90c66..f884760ca5cf 100644
+--- a/arch/powerpc/mm/ptdump/shared.c
++++ b/arch/powerpc/mm/ptdump/shared.c
+@@ -17,9 +17,9 @@ static const struct flag_info flag_array[] = {
+               .clear  = "    ",
+       }, {
+               .mask   = _PAGE_RW,
+-              .val    = _PAGE_RW,
+-              .set    = "rw",
+-              .clear  = "r ",
++              .val    = 0,
++              .set    = "r ",
++              .clear  = "rw",
+       }, {
+               .mask   = _PAGE_EXEC,
+               .val    = _PAGE_EXEC,
+-- 
+2.35.1
+
diff --git a/queue-5.19/powerpc-restore-config_debug_info-in-defconfigs.patch-27837 b/queue-5.19/powerpc-restore-config_debug_info-in-defconfigs.patch-27837

new file mode 100644 (file)

index 0000000..24bd9aa
--- /dev/null
+++ b/queue-5.19/powerpc-restore-config_debug_info-in-defconfigs.patch-27837
@@ -0,0 +1,309 @@
+From b7d3d9e06c7ee60503525173d9dbda4c5ced3247 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 11 Jun 2022 08:51:57 +0200
+Subject: powerpc: Restore CONFIG_DEBUG_INFO in defconfigs
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 92f89ec1b534b6eca2b81bae97d30a786932f51a ]
+
+Commit f9b3cd245784 ("Kconfig.debug: make DEBUG_INFO selectable from a
+choice") broke the selection of CONFIG_DEBUG_INFO by powerpc defconfigs.
+
+It is now necessary to select one of the three DEBUG_INFO_DWARF*
+options to get DEBUG_INFO enabled.
+
+Replace DEBUG_INFO=y by DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y in all
+defconfigs using the following command:
+
+sed -i s/DEBUG_INFO=y/DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y/g `git grep -l DEBUG_INFO arch/powerpc/configs/`
+
+Fixes: f9b3cd245784 ("Kconfig.debug: make DEBUG_INFO selectable from a choice")
+Cc: stable@vger.kernel.org
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
+Link: https://lore.kernel.org/r/98a4c2603bf9e4b776e219f5b8541d23aa24e854.1654930308.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/configs/44x/akebono_defconfig    | 2 +-
+ arch/powerpc/configs/44x/currituck_defconfig  | 2 +-
+ arch/powerpc/configs/44x/fsp2_defconfig       | 2 +-
+ arch/powerpc/configs/44x/iss476-smp_defconfig | 2 +-
+ arch/powerpc/configs/44x/warp_defconfig       | 2 +-
+ arch/powerpc/configs/52xx/lite5200b_defconfig | 2 +-
+ arch/powerpc/configs/52xx/motionpro_defconfig | 2 +-
+ arch/powerpc/configs/52xx/tqm5200_defconfig   | 2 +-
+ arch/powerpc/configs/adder875_defconfig       | 2 +-
+ arch/powerpc/configs/ep8248e_defconfig        | 2 +-
+ arch/powerpc/configs/ep88xc_defconfig         | 2 +-
+ arch/powerpc/configs/fsl-emb-nonhw.config     | 2 +-
+ arch/powerpc/configs/mgcoge_defconfig         | 2 +-
+ arch/powerpc/configs/mpc5200_defconfig        | 2 +-
+ arch/powerpc/configs/mpc8272_ads_defconfig    | 2 +-
+ arch/powerpc/configs/mpc885_ads_defconfig     | 2 +-
+ arch/powerpc/configs/ppc6xx_defconfig         | 2 +-
+ arch/powerpc/configs/pq2fads_defconfig        | 2 +-
+ arch/powerpc/configs/ps3_defconfig            | 2 +-
+ arch/powerpc/configs/tqm8xx_defconfig         | 2 +-
+ 20 files changed, 20 insertions(+), 20 deletions(-)
+
+diff --git a/arch/powerpc/configs/44x/akebono_defconfig b/arch/powerpc/configs/44x/akebono_defconfig
+index 4bc549c6edc5..fde4824f235e 100644
+--- a/arch/powerpc/configs/44x/akebono_defconfig
++++ b/arch/powerpc/configs/44x/akebono_defconfig
+@@ -118,7 +118,7 @@ CONFIG_CRAMFS=y
+ CONFIG_NLS_DEFAULT="n"
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ISO8859_1=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ CONFIG_XMON=y
+diff --git a/arch/powerpc/configs/44x/currituck_defconfig b/arch/powerpc/configs/44x/currituck_defconfig
+index 717827219921..7283b7d4a1a5 100644
+--- a/arch/powerpc/configs/44x/currituck_defconfig
++++ b/arch/powerpc/configs/44x/currituck_defconfig
+@@ -73,7 +73,7 @@ CONFIG_NFS_FS=y
+ CONFIG_NFS_V3_ACL=y
+ CONFIG_NFS_V4=y
+ CONFIG_NLS_DEFAULT="n"
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ CONFIG_XMON=y
+diff --git a/arch/powerpc/configs/44x/fsp2_defconfig b/arch/powerpc/configs/44x/fsp2_defconfig
+index 8da316e61a08..3fdfbb29b854 100644
+--- a/arch/powerpc/configs/44x/fsp2_defconfig
++++ b/arch/powerpc/configs/44x/fsp2_defconfig
+@@ -110,7 +110,7 @@ CONFIG_XZ_DEC=y
+ CONFIG_PRINTK_TIME=y
+ CONFIG_MESSAGE_LOGLEVEL_DEFAULT=3
+ CONFIG_DYNAMIC_DEBUG=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ CONFIG_CRYPTO_CBC=y
+diff --git a/arch/powerpc/configs/44x/iss476-smp_defconfig b/arch/powerpc/configs/44x/iss476-smp_defconfig
+index c11e777b2f3d..0f6380e1e612 100644
+--- a/arch/powerpc/configs/44x/iss476-smp_defconfig
++++ b/arch/powerpc/configs/44x/iss476-smp_defconfig
+@@ -56,7 +56,7 @@ CONFIG_PROC_KCORE=y
+ CONFIG_TMPFS=y
+ CONFIG_CRAMFS=y
+ # CONFIG_NETWORK_FILESYSTEMS is not set
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ CONFIG_PPC_EARLY_DEBUG=y
+diff --git a/arch/powerpc/configs/44x/warp_defconfig b/arch/powerpc/configs/44x/warp_defconfig
+index 47252c2d7669..20891c413149 100644
+--- a/arch/powerpc/configs/44x/warp_defconfig
++++ b/arch/powerpc/configs/44x/warp_defconfig
+@@ -88,7 +88,7 @@ CONFIG_NLS_UTF8=y
+ CONFIG_CRC_CCITT=y
+ CONFIG_CRC_T10DIF=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DEBUG_FS=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+diff --git a/arch/powerpc/configs/52xx/lite5200b_defconfig b/arch/powerpc/configs/52xx/lite5200b_defconfig
+index 63368e677506..7db479dcbc0c 100644
+--- a/arch/powerpc/configs/52xx/lite5200b_defconfig
++++ b/arch/powerpc/configs/52xx/lite5200b_defconfig
+@@ -58,6 +58,6 @@ CONFIG_NFS_FS=y
+ CONFIG_NFS_V4=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DETECT_HUNG_TASK=y
+ # CONFIG_DEBUG_BUGVERBOSE is not set
+diff --git a/arch/powerpc/configs/52xx/motionpro_defconfig b/arch/powerpc/configs/52xx/motionpro_defconfig
+index 72762da94846..6186ead1e105 100644
+--- a/arch/powerpc/configs/52xx/motionpro_defconfig
++++ b/arch/powerpc/configs/52xx/motionpro_defconfig
+@@ -84,7 +84,7 @@ CONFIG_ROOT_NFS=y
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DETECT_HUNG_TASK=y
+ # CONFIG_DEBUG_BUGVERBOSE is not set
+ CONFIG_CRYPTO_ECB=y
+diff --git a/arch/powerpc/configs/52xx/tqm5200_defconfig b/arch/powerpc/configs/52xx/tqm5200_defconfig
+index a3c8ca74032c..e6735b945327 100644
+--- a/arch/powerpc/configs/52xx/tqm5200_defconfig
++++ b/arch/powerpc/configs/52xx/tqm5200_defconfig
+@@ -85,7 +85,7 @@ CONFIG_ROOT_NFS=y
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DETECT_HUNG_TASK=y
+ # CONFIG_DEBUG_BUGVERBOSE is not set
+ CONFIG_CRYPTO_ECB=y
+diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
+index 5326bc739279..7f35d5bc1229 100644
+--- a/arch/powerpc/configs/adder875_defconfig
++++ b/arch/powerpc/configs/adder875_defconfig
+@@ -45,7 +45,7 @@ CONFIG_CRAMFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CRC32_SLICEBY4=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DEBUG_FS=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+diff --git a/arch/powerpc/configs/ep8248e_defconfig b/arch/powerpc/configs/ep8248e_defconfig
+index 00d69965f898..8df6d3a293e3 100644
+--- a/arch/powerpc/configs/ep8248e_defconfig
++++ b/arch/powerpc/configs/ep8248e_defconfig
+@@ -59,7 +59,7 @@ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ASCII=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_NLS_UTF8=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ # CONFIG_SCHED_DEBUG is not set
+ CONFIG_BDI_SWITCH=y
+diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
+index f5c3e72da719..a98ef6a4abef 100644
+--- a/arch/powerpc/configs/ep88xc_defconfig
++++ b/arch/powerpc/configs/ep88xc_defconfig
+@@ -48,6 +48,6 @@ CONFIG_CRAMFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CRC32_SLICEBY4=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+diff --git a/arch/powerpc/configs/fsl-emb-nonhw.config b/arch/powerpc/configs/fsl-emb-nonhw.config
+index df37efed0aec..f14c6dbd7346 100644
+--- a/arch/powerpc/configs/fsl-emb-nonhw.config
++++ b/arch/powerpc/configs/fsl-emb-nonhw.config
+@@ -24,7 +24,7 @@ CONFIG_CRYPTO_PCBC=m
+ CONFIG_CRYPTO_SHA256=y
+ CONFIG_CRYPTO_SHA512=y
+ CONFIG_DEBUG_FS=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DEBUG_KERNEL=y
+ CONFIG_DEBUG_SHIRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+diff --git a/arch/powerpc/configs/mgcoge_defconfig b/arch/powerpc/configs/mgcoge_defconfig
+index dcc8dccf54f3..498d35db7833 100644
+--- a/arch/powerpc/configs/mgcoge_defconfig
++++ b/arch/powerpc/configs/mgcoge_defconfig
+@@ -73,7 +73,7 @@ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ASCII=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_NLS_UTF8=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DEBUG_FS=y
+ CONFIG_MAGIC_SYSRQ=y
+ # CONFIG_SCHED_DEBUG is not set
+diff --git a/arch/powerpc/configs/mpc5200_defconfig b/arch/powerpc/configs/mpc5200_defconfig
+index 83d801307178..c0fe5e76604a 100644
+--- a/arch/powerpc/configs/mpc5200_defconfig
++++ b/arch/powerpc/configs/mpc5200_defconfig
+@@ -122,6 +122,6 @@ CONFIG_ROOT_NFS=y
+ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_PRINTK_TIME=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_DEBUG_KERNEL=y
+ CONFIG_DETECT_HUNG_TASK=y
+diff --git a/arch/powerpc/configs/mpc8272_ads_defconfig b/arch/powerpc/configs/mpc8272_ads_defconfig
+index 00a4d2bf43b2..4145ef5689ca 100644
+--- a/arch/powerpc/configs/mpc8272_ads_defconfig
++++ b/arch/powerpc/configs/mpc8272_ads_defconfig
+@@ -67,7 +67,7 @@ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ASCII=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_NLS_UTF8=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ CONFIG_BDI_SWITCH=y
+diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
+index c74dc76b1d0d..700115d85d6f 100644
+--- a/arch/powerpc/configs/mpc885_ads_defconfig
++++ b/arch/powerpc/configs/mpc885_ads_defconfig
+@@ -71,7 +71,7 @@ CONFIG_ROOT_NFS=y
+ CONFIG_CRYPTO=y
+ CONFIG_CRYPTO_DEV_TALITOS=y
+ CONFIG_CRC32_SLICEBY4=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DEBUG_FS=y
+ CONFIG_DEBUG_VM_PGTABLE=y
+diff --git a/arch/powerpc/configs/ppc6xx_defconfig b/arch/powerpc/configs/ppc6xx_defconfig
+index b622ecd73286..91967824272e 100644
+--- a/arch/powerpc/configs/ppc6xx_defconfig
++++ b/arch/powerpc/configs/ppc6xx_defconfig
+@@ -1065,7 +1065,7 @@ CONFIG_NLS_ISO8859_14=m
+ CONFIG_NLS_ISO8859_15=m
+ CONFIG_NLS_KOI8_R=m
+ CONFIG_NLS_KOI8_U=m
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_HEADERS_INSTALL=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DEBUG_KERNEL=y
+diff --git a/arch/powerpc/configs/pq2fads_defconfig b/arch/powerpc/configs/pq2fads_defconfig
+index 9d8a76857c6f..9d63e2e65211 100644
+--- a/arch/powerpc/configs/pq2fads_defconfig
++++ b/arch/powerpc/configs/pq2fads_defconfig
+@@ -68,7 +68,7 @@ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ASCII=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_NLS_UTF8=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+ # CONFIG_SCHED_DEBUG is not set
+diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
+index 7c95fab4b920..2d9ac233da68 100644
+--- a/arch/powerpc/configs/ps3_defconfig
++++ b/arch/powerpc/configs/ps3_defconfig
+@@ -153,7 +153,7 @@ CONFIG_NLS_CODEPAGE_437=y
+ CONFIG_NLS_ISO8859_1=y
+ CONFIG_CRC_CCITT=m
+ CONFIG_CRC_T10DIF=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DEBUG_MEMORY_INIT=y
+ CONFIG_DEBUG_STACKOVERFLOW=y
+diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
+index 77857d513022..083c2e57520a 100644
+--- a/arch/powerpc/configs/tqm8xx_defconfig
++++ b/arch/powerpc/configs/tqm8xx_defconfig
+@@ -55,6 +55,6 @@ CONFIG_CRAMFS=y
+ CONFIG_NFS_FS=y
+ CONFIG_ROOT_NFS=y
+ CONFIG_CRC32_SLICEBY4=y
+-CONFIG_DEBUG_INFO=y
++CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+ CONFIG_MAGIC_SYSRQ=y
+ CONFIG_DETECT_HUNG_TASK=y
+-- 
+2.35.1
+
diff --git a/queue-5.19/revert-kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when.patch b/queue-5.19/revert-kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when.patch

new file mode 100644 (file)

index 0000000..17b42cb
--- /dev/null
+++ b/queue-5.19/revert-kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when.patch
@@ -0,0 +1,77 @@
+From b158536de85c5edd2ddde0f2d4b65a17607c72e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 11 Jun 2022 00:57:51 +0000
+Subject: Revert "KVM: x86/pmu: Accept 0 for absent PMU MSRs when
+ host-initiated if !enable_pmu"
+
+From: Sean Christopherson <seanjc@google.com>
+
+[ Upstream commit 5d4283df5a0fc8299fba9443c33d219939eccc2d ]
+
+Eating reads and writes to all "PMU" MSRs when there is no PMU is wildly
+broken as it results in allowing accesses to _any_ MSR on Intel CPUs
+as intel_is_valid_msr() returns true for all host_initiated accesses.
+
+A revert of commit d1c88a402056 ("KVM: x86: always allow host-initiated
+writes to PMU MSRs") will soon follow.
+
+This reverts commit 8e6a58e28b34e8d247e772159b8fa8f6bae39192.
+
+Signed-off-by: Sean Christopherson <seanjc@google.com>
+Message-Id: <20220611005755.753273-4-seanjc@google.com>
+Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kvm/pmu.c     |  8 --------
+ arch/x86/kvm/svm/pmu.c | 11 +----------
+ 2 files changed, 1 insertion(+), 18 deletions(-)
+
+diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c
+index 2334ddfbbab2..3f868fed9114 100644
+--- a/arch/x86/kvm/pmu.c
++++ b/arch/x86/kvm/pmu.c
+@@ -433,19 +433,11 @@ static void kvm_pmu_mark_pmc_in_use(struct kvm_vcpu *vcpu, u32 msr)
+ 
+ int kvm_pmu_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ {
+-      if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu) {
+-              msr_info->data = 0;
+-              return 0;
+-      }
+-
+       return static_call(kvm_x86_pmu_get_msr)(vcpu, msr_info);
+ }
+ 
+ int kvm_pmu_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
+ {
+-      if (msr_info->host_initiated && !vcpu->kvm->arch.enable_pmu)
+-              return !!msr_info->data;
+-
+       kvm_pmu_mark_pmc_in_use(vcpu, msr_info->index);
+       return static_call(kvm_x86_pmu_set_msr)(vcpu, msr_info);
+ }
+diff --git a/arch/x86/kvm/svm/pmu.c b/arch/x86/kvm/svm/pmu.c
+index d93ecb25fe17..136039fc6d01 100644
+--- a/arch/x86/kvm/svm/pmu.c
++++ b/arch/x86/kvm/svm/pmu.c
+@@ -232,16 +232,7 @@ static struct kvm_pmc *amd_rdpmc_ecx_to_pmc(struct kvm_vcpu *vcpu,
+ static bool amd_is_valid_msr(struct kvm_vcpu *vcpu, u32 msr)
+ {
+       /* All MSRs refer to exactly one PMC, so msr_idx_to_pmc is enough.  */
+-      if (!host_initiated)
+-              return false;
+-
+-      switch (msr) {
+-      case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
+-      case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+-              return true;
+-      default:
+-              return false;
+-      }
++      return false;
+ }
+ 
+ static struct kvm_pmc *amd_msr_idx_to_pmc(struct kvm_vcpu *vcpu, u32 msr)
+-- 
+2.35.1
+
diff --git a/queue-5.19/s390-unwind-fix-fgraph-return-address-recovery.patch b/queue-5.19/s390-unwind-fix-fgraph-return-address-recovery.patch

new file mode 100644 (file)

index 0000000..5c3e2a6
--- /dev/null
+++ b/queue-5.19/s390-unwind-fix-fgraph-return-address-recovery.patch
@@ -0,0 +1,46 @@
+From 7367c31efb79894a0254894fb361a76805e101da Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 18:57:59 +0200
+Subject: s390/unwind: fix fgraph return address recovery
+
+From: Sumanth Korikkar <sumanthk@linux.ibm.com>
+
+[ Upstream commit ded466e1806686794b403ebf031133bbaca76bb2 ]
+
+When HAVE_FUNCTION_GRAPH_RET_ADDR_PTR is defined, the return
+address to the fgraph caller is recovered by tagging it along with the
+stack pointer of ftrace stack. This makes the stack unwinding more
+reliable.
+
+When the fgraph return address is modified to return_to_handler,
+ftrace_graph_ret_addr tries to restore it to the original
+value using tagged stack pointer.
+
+Fix this by passing tagged sp to ftrace_graph_ret_addr.
+
+Fixes: d81675b60d09 ("s390/unwind: recover kretprobe modified return address in stacktrace")
+Cc: <stable@vger.kernel.org> # 5.18
+Reviewed-by: Vasily Gorbik <gor@linux.ibm.com>
+Signed-off-by: Sumanth Korikkar <sumanthk@linux.ibm.com>
+Signed-off-by: Alexander Gordeev <agordeev@linux.ibm.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/s390/include/asm/unwind.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/s390/include/asm/unwind.h b/arch/s390/include/asm/unwind.h
+index 0bf06f1682d8..02462e7100c1 100644
+--- a/arch/s390/include/asm/unwind.h
++++ b/arch/s390/include/asm/unwind.h
+@@ -47,7 +47,7 @@ struct unwind_state {
+ static inline unsigned long unwind_recover_ret_addr(struct unwind_state *state,
+                                                   unsigned long ip)
+ {
+-      ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, NULL);
++      ip = ftrace_graph_ret_addr(state->task, &state->graph_idx, ip, (void *)state->sp);
+       if (is_kretprobe_trampoline(ip))
+               ip = kretprobe_find_ret_addr(state->task, (void *)state->sp, &state->kr_cur);
+       return ip;
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-lpfc-remove-extra-atomic_inc-on-cmd_pending-in-.patch b/queue-5.19/scsi-lpfc-remove-extra-atomic_inc-on-cmd_pending-in-.patch

new file mode 100644 (file)

index 0000000..5e2e9b0
--- /dev/null
+++ b/queue-5.19/scsi-lpfc-remove-extra-atomic_inc-on-cmd_pending-in-.patch
@@ -0,0 +1,40 @@
+From 452a991df03fea2d6438fa8f8da69e1767cab1b8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 14:14:17 -0700
+Subject: scsi: lpfc: Remove extra atomic_inc on cmd_pending in queuecommand
+ after VMID
+
+From: James Smart <jsmart2021@gmail.com>
+
+[ Upstream commit 0948a9c5386095baae4012190a6b65aba684a907 ]
+
+VMID introduced an extra increment of cmd_pending, causing double-counting
+of the I/O. The normal increment ios performed in lpfc_get_scsi_buf.
+
+Link: https://lore.kernel.org/r/20220701211425.2708-5-jsmart2021@gmail.com
+Fixes: 33c79741deaf ("scsi: lpfc: vmid: Introduce VMID in I/O path")
+Cc: <stable@vger.kernel.org> # v5.14+
+Co-developed-by: Justin Tee <justin.tee@broadcom.com>
+Signed-off-by: Justin Tee <justin.tee@broadcom.com>
+Signed-off-by: James Smart <jsmart2021@gmail.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/lpfc/lpfc_scsi.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
+index ba5e4016262e..084c0f9fdc3a 100644
+--- a/drivers/scsi/lpfc/lpfc_scsi.c
++++ b/drivers/scsi/lpfc/lpfc_scsi.c
+@@ -5456,7 +5456,6 @@ lpfc_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *cmnd)
+                               cur_iocbq->cmd_flag |= LPFC_IO_VMID;
+               }
+       }
+-      atomic_inc(&ndlp->cmd_pending);
+ 
+ #ifdef CONFIG_SCSI_LPFC_DEBUG_FS
+       if (unlikely(phba->hdwqstat_on & LPFC_CHECK_SCSI_IO))
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-edif-fix-dropped-ike-message.patch b/queue-5.19/scsi-qla2xxx-edif-fix-dropped-ike-message.patch

new file mode 100644 (file)

index 0000000..64e8c58
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-edif-fix-dropped-ike-message.patch
@@ -0,0 +1,126 @@
+From b9f9bae7f3cc422d1625e07eae230361a3e4e4e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:40 -0700
+Subject: scsi: qla2xxx: edif: Fix dropped IKE message
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit c019cd656e717349ff22d0c41d6fbfc773f48c52 ]
+
+This patch fixes IKE message being dropped due to error in processing Purex
+IOCB and Continuation IOCBs.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-6-njavali@marvell.com
+Fixes: fac2807946c1 ("scsi: qla2xxx: edif: Add extraction of auth_els from the wire")
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_isr.c | 54 +++++++++++++++-------------------
+ 1 file changed, 24 insertions(+), 30 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index 1353a5b61c14..895a8d6a5f0e 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -3710,12 +3710,11 @@ void qla24xx_nvme_ls4_iocb(struct scsi_qla_host *vha,
+  * Return: 0 all iocbs has arrived, xx- all iocbs have not arrived.
+  */
+ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+-      struct rsp_que *rsp, response_t *pkt)
++      struct rsp_que *rsp, response_t *pkt, u32 rsp_q_in)
+ {
+-      int start_pkt_ring_index, end_pkt_ring_index, n_ring_index;
+-      response_t *end_pkt;
++      int start_pkt_ring_index;
++      u32 iocb_cnt = 0;
+       int rc = 0;
+-      u32 rsp_q_in;
+ 
+       if (pkt->entry_count == 1)
+               return rc;
+@@ -3726,34 +3725,18 @@ static int qla_chk_cont_iocb_avail(struct scsi_qla_host *vha,
+       else
+               start_pkt_ring_index = rsp->ring_index - 1;
+ 
+-      if ((start_pkt_ring_index + pkt->entry_count) >= rsp->length)
+-              end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count -
+-                      rsp->length - 1;
++      if (rsp_q_in < start_pkt_ring_index)
++              /* q in ptr is wrapped */
++              iocb_cnt = rsp->length - start_pkt_ring_index + rsp_q_in;
+       else
+-              end_pkt_ring_index = start_pkt_ring_index + pkt->entry_count - 1;
++              iocb_cnt = rsp_q_in - start_pkt_ring_index;
+ 
+-      end_pkt = rsp->ring + end_pkt_ring_index;
+-
+-      /*  next pkt = end_pkt + 1 */
+-      n_ring_index = end_pkt_ring_index + 1;
+-      if (n_ring_index >= rsp->length)
+-              n_ring_index = 0;
+-
+-      rsp_q_in = rsp->qpair->use_shadow_reg ? *rsp->in_ptr :
+-              rd_reg_dword(rsp->rsp_q_in);
+-
+-      /* rsp_q_in is either wrapped or pointing beyond endpkt */
+-      if ((rsp_q_in < start_pkt_ring_index && rsp_q_in < n_ring_index) ||
+-                      rsp_q_in >= n_ring_index)
+-              /* all IOCBs arrived. */
+-              rc = 0;
+-      else
++      if (iocb_cnt < pkt->entry_count)
+               rc = -EIO;
+ 
+-      ql_dbg(ql_dbg_init + ql_dbg_verbose, vha, 0x5091,
+-          "%s - ring %p pkt %p end pkt %p entry count %#x rsp_q_in %d rc %d\n",
+-          __func__, rsp->ring, pkt, end_pkt, pkt->entry_count,
+-          rsp_q_in, rc);
++      ql_dbg(ql_dbg_init, vha, 0x5091,
++             "%s - ring %p pkt %p entry count %d iocb_cnt %d rsp_q_in %d rc %d\n",
++             __func__, rsp->ring, pkt, pkt->entry_count, iocb_cnt, rsp_q_in, rc);
+ 
+       return rc;
+ }
+@@ -3770,7 +3753,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+       struct qla_hw_data *ha = vha->hw;
+       struct purex_entry_24xx *purex_entry;
+       struct purex_item *pure_item;
+-      u16 rsp_in = 0;
++      u16 rsp_in = 0, cur_ring_index;
+       int follow_inptr, is_shadow_hba;
+ 
+       if (!ha->flags.fw_started)
+@@ -3801,6 +3784,7 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                      (!follow_inptr &&
+                       rsp->ring_ptr->signature != RESPONSE_PROCESSED)) {
+               pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
++              cur_ring_index = rsp->ring_index;
+ 
+               rsp->ring_index++;
+               if (rsp->ring_index == rsp->length) {
+@@ -3921,7 +3905,17 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                               break;
+ 
+                       case ELS_AUTH_ELS:
+-                              if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt)) {
++                              if (qla_chk_cont_iocb_avail(vha, rsp, (response_t *)pkt, rsp_in)) {
++                                      /*
++                                       * ring_ptr and ring_index were
++                                       * pre-incremented above. Reset them
++                                       * back to current. Wait for next
++                                       * interrupt with all IOCBs to arrive
++                                       * and re-process.
++                                       */
++                                      rsp->ring_ptr = (response_t *)pkt;
++                                      rsp->ring_index = cur_ring_index;
++
+                                       ql_dbg(ql_dbg_init, vha, 0x5091,
+                                           "Defer processing ELS opcode %#x...\n",
+                                           purex_entry->els_frame_payload[3]);
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch b/queue-5.19/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch

new file mode 100644 (file)

index 0000000..5b7c628
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch
@@ -0,0 +1,125 @@
+From bbf97f698babaae6efec80f94f046833506fccf4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:02 -0700
+Subject: scsi: qla2xxx: Fix crash due to stale SRB access around I/O timeouts
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit c39587bc0abaf16593f7abcdf8aeec3c038c7d52 ]
+
+Ensure SRB is returned during I/O timeout error escalation. If that is not
+possible fail the escalation path.
+
+Following crash stack was seen:
+
+BUG: unable to handle kernel paging request at 0000002f56aa90f8
+IP: qla_chk_edif_rx_sa_delete_pending+0x14/0x30 [qla2xxx]
+Call Trace:
+ ? qla2x00_status_entry+0x19f/0x1c50 [qla2xxx]
+ ? qla2x00_start_sp+0x116/0x1170 [qla2xxx]
+ ? dma_pool_alloc+0x1d6/0x210
+ ? mempool_alloc+0x54/0x130
+ ? qla24xx_process_response_queue+0x548/0x12b0 [qla2xxx]
+ ? qla_do_work+0x2d/0x40 [qla2xxx]
+ ? process_one_work+0x14c/0x390
+
+Link: https://lore.kernel.org/r/20220616053508.27186-6-njavali@marvell.com
+Fixes: d74595278f4a ("scsi: qla2xxx: Add multiple queue pair functionality.")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_os.c | 43 +++++++++++++++++++++++++----------
+ 1 file changed, 31 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 6fd5c21ad1f5..66f1723c8583 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -1342,21 +1342,20 @@ qla2xxx_eh_abort(struct scsi_cmnd *cmd)
+ /*
+  * Returns: QLA_SUCCESS or QLA_FUNCTION_FAILED.
+  */
+-int
+-qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+-      uint64_t l, enum nexus_wait_type type)
++static int
++__qla2x00_eh_wait_for_pending_commands(struct qla_qpair *qpair, unsigned int t,
++                                     uint64_t l, enum nexus_wait_type type)
+ {
+       int cnt, match, status;
+       unsigned long flags;
+-      struct qla_hw_data *ha = vha->hw;
+-      struct req_que *req;
++      scsi_qla_host_t *vha = qpair->vha;
++      struct req_que *req = qpair->req;
+       srb_t *sp;
+       struct scsi_cmnd *cmd;
+ 
+       status = QLA_SUCCESS;
+ 
+-      spin_lock_irqsave(&ha->hardware_lock, flags);
+-      req = vha->req;
++      spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+       for (cnt = 1; status == QLA_SUCCESS &&
+               cnt < req->num_outstanding_cmds; cnt++) {
+               sp = req->outstanding_cmds[cnt];
+@@ -1383,12 +1382,32 @@ qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
+               if (!match)
+                       continue;
+ 
+-              spin_unlock_irqrestore(&ha->hardware_lock, flags);
++              spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
+               status = qla2x00_eh_wait_on_command(cmd);
+-              spin_lock_irqsave(&ha->hardware_lock, flags);
++              spin_lock_irqsave(qpair->qp_lock_ptr, flags);
+       }
+-      spin_unlock_irqrestore(&ha->hardware_lock, flags);
++      spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
++
++      return status;
++}
++
++int
++qla2x00_eh_wait_for_pending_commands(scsi_qla_host_t *vha, unsigned int t,
++                                   uint64_t l, enum nexus_wait_type type)
++{
++      struct qla_qpair *qpair;
++      struct qla_hw_data *ha = vha->hw;
++      int i, status = QLA_SUCCESS;
+ 
++      status = __qla2x00_eh_wait_for_pending_commands(ha->base_qpair, t, l,
++                                                      type);
++      for (i = 0; status == QLA_SUCCESS && i < ha->max_qpairs; i++) {
++              qpair = ha->queue_pair_map[i];
++              if (!qpair)
++                      continue;
++              status = __qla2x00_eh_wait_for_pending_commands(qpair, t, l,
++                                                              type);
++      }
+       return status;
+ }
+ 
+@@ -1425,7 +1444,7 @@ qla2xxx_eh_device_reset(struct scsi_cmnd *cmd)
+               return err;
+ 
+       if (fcport->deleted)
+-              return SUCCESS;
++              return FAILED;
+ 
+       ql_log(ql_log_info, vha, 0x8009,
+           "DEVICE RESET ISSUED nexus=%ld:%d:%llu cmd=%p.\n", vha->host_no,
+@@ -1493,7 +1512,7 @@ qla2xxx_eh_target_reset(struct scsi_cmnd *cmd)
+               return err;
+ 
+       if (fcport->deleted)
+-              return SUCCESS;
++              return FAILED;
+ 
+       ql_log(ql_log_info, vha, 0x8009,
+           "TARGET RESET ISSUED nexus=%ld:%d cmd=%p.\n", vha->host_no,
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-25366 b/queue-5.19/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-25366

new file mode 100644 (file)

index 0000000..a3d472a
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-25366
@@ -0,0 +1,116 @@
+From 6cfb8ba3f898fc966bba265d8baccb83d66f73f3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:42 -0700
+Subject: scsi: qla2xxx: Fix discovery issues in FC-AL topology
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 47ccb113cead905bdc236571bf8ac6fed90321b3 ]
+
+A direct attach tape device, when gets swapped with another, was not
+discovered. Fix this by looking at loop map and reinitialize link if there
+are devices present.
+
+Link: https://lore.kernel.org/linux-scsi/baef87c3-5dad-3b47-44c1-6914bfc90108@cybernetics.com/
+Link: https://lore.kernel.org/r/20220713052045.10683-8-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reported-by: Tony Battersby <tonyb@cybernetics.com>
+Tested-by: Tony Battersby <tonyb@cybernetics.com>
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gbl.h  |  3 ++-
+ drivers/scsi/qla2xxx/qla_init.c | 29 +++++++++++++++++++++++++++++
+ drivers/scsi/qla2xxx/qla_mbx.c  |  5 ++++-
+ 3 files changed, 35 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
+index a211ed18d4e9..1c2c161b4e9f 100644
+--- a/drivers/scsi/qla2xxx/qla_gbl.h
++++ b/drivers/scsi/qla2xxx/qla_gbl.h
+@@ -435,7 +435,8 @@ extern int
+ qla2x00_get_resource_cnts(scsi_qla_host_t *);
+ 
+ extern int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map);
++qla2x00_get_fcal_position_map(scsi_qla_host_t *ha, char *pos_map,
++              u8 *num_entries);
+ 
+ extern int
+ qla2x00_get_link_status(scsi_qla_host_t *, uint16_t, struct link_statistics *,
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index f8a7b6f2541e..7b78d331aabd 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -5505,6 +5505,22 @@ static int qla2x00_configure_n2n_loop(scsi_qla_host_t *vha)
+       return QLA_FUNCTION_FAILED;
+ }
+ 
++static void
++qla_reinitialize_link(scsi_qla_host_t *vha)
++{
++      int rval;
++
++      atomic_set(&vha->loop_state, LOOP_DOWN);
++      atomic_set(&vha->loop_down_timer, LOOP_DOWN_TIME);
++      rval = qla2x00_full_login_lip(vha);
++      if (rval == QLA_SUCCESS) {
++              ql_dbg(ql_dbg_disc, vha, 0xd050, "Link reinitialized\n");
++      } else {
++              ql_dbg(ql_dbg_disc, vha, 0xd051,
++                      "Link reinitialization failed (%d)\n", rval);
++      }
++}
++
+ /*
+  * qla2x00_configure_local_loop
+  *    Updates Fibre Channel Device Database with local loop devices.
+@@ -5556,6 +5572,19 @@ qla2x00_configure_local_loop(scsi_qla_host_t *vha)
+               spin_unlock_irqrestore(&vha->work_lock, flags);
+ 
+               if (vha->scan.scan_retry < MAX_SCAN_RETRIES) {
++                      u8 loop_map_entries = 0;
++                      int rc;
++
++                      rc = qla2x00_get_fcal_position_map(vha, NULL,
++                                              &loop_map_entries);
++                      if (rc == QLA_SUCCESS && loop_map_entries > 1) {
++                              /*
++                               * There are devices that are still not logged
++                               * in. Reinitialize to give them a chance.
++                               */
++                              qla_reinitialize_link(vha);
++                              return QLA_FUNCTION_FAILED;
++                      }
+                       set_bit(LOCAL_LOOP_UPDATE, &vha->dpc_flags);
+                       set_bit(LOOP_RESYNC_NEEDED, &vha->dpc_flags);
+               }
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index bcade1deb798..86d8c455c07a 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -3068,7 +3068,8 @@ qla2x00_get_resource_cnts(scsi_qla_host_t *vha)
+  *    Kernel context.
+  */
+ int
+-qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
++qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map,
++              u8 *num_entries)
+ {
+       int rval;
+       mbx_cmd_t mc;
+@@ -3108,6 +3109,8 @@ qla2x00_get_fcal_position_map(scsi_qla_host_t *vha, char *pos_map)
+ 
+               if (pos_map)
+                       memcpy(pos_map, pmap, FCAL_MAP_SIZE);
++              if (num_entries)
++                      *num_entries = pmap[0];
+       }
+       dma_pool_free(ha->s_dma_pool, pmap, pmap_dma);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch b/queue-5.19/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch

new file mode 100644 (file)

index 0000000..bd8146c
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch
@@ -0,0 +1,67 @@
+From 60e461e5c6fe788bcc728e8b17d25dfb9f1c8be4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:07 -0700
+Subject: scsi: qla2xxx: Fix erroneous mailbox timeout after PCI error
+ injection
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit f260694e6463b63ae550aad25ddefe94cb1904da ]
+
+Clear wait for mailbox interrupt flag to prevent stale mailbox:
+
+Feb 22 05:22:56 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-500a:4: LOOP UP detected (16 Gbps).
+Feb 22 05:22:59 ltcden4-lp7 kernel: qla2xxx [0135:90:00.1]-d04c:4: MBX Command timeout for cmd 69, ...
+
+To fix the issue, driver needs to clear the MBX_INTR_WAIT flag on purging
+the mailbox. When the stale mailbox completion does arrive, it will be
+dropped.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-11-njavali@marvell.com
+Fixes: b6faaaf796d7 ("scsi: qla2xxx: Serialize mailbox request")
+Cc: Naresh Bannoth <nbannoth@in.ibm.com>
+Cc: Kyle Mahlkuch <Kyle.Mahlkuch@ibm.com>
+Cc: stable@vger.kernel.org
+Reported-by: Naresh Bannoth <nbannoth@in.ibm.com>
+Tested-by: Naresh Bannoth <nbannoth@in.ibm.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_mbx.c | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index 892caf2475df..1b154ab025bd 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -274,6 +274,12 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+               atomic_inc(&ha->num_pend_mbx_stage3);
+               if (!wait_for_completion_timeout(&ha->mbx_intr_comp,
+                   mcp->tov * HZ)) {
++                      ql_dbg(ql_dbg_mbx, vha, 0x117a,
++                          "cmd=%x Timeout.\n", command);
++                      spin_lock_irqsave(&ha->hardware_lock, flags);
++                      clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
++                      spin_unlock_irqrestore(&ha->hardware_lock, flags);
++
+                       if (chip_reset != ha->chip_reset) {
+                               eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+ 
+@@ -286,12 +292,6 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+                               rval = QLA_ABORTED;
+                               goto premature_exit;
+                       }
+-                      ql_dbg(ql_dbg_mbx, vha, 0x117a,
+-                          "cmd=%x Timeout.\n", command);
+-                      spin_lock_irqsave(&ha->hardware_lock, flags);
+-                      clear_bit(MBX_INTR_WAIT, &ha->mbx_cmd_flags);
+-                      spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-
+               } else if (ha->flags.purge_mbox ||
+                   chip_reset != ha->chip_reset) {
+                       eeh_delay = ha->flags.eeh_busy ? 1 : 0;
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch b/queue-5.19/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch

new file mode 100644 (file)

index 0000000..7bb70e0
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch
@@ -0,0 +1,48 @@
+From a46bb1ecd095c0bbfe78b1da5476ebd6575afc56 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:34:58 -0700
+Subject: scsi: qla2xxx: Fix excessive I/O error messages by default
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit bff4873c709085e09d0ffae0c25b8e65256e3205 ]
+
+Disable printing I/O error messages by default.  The messages will be
+printed only when logging was enabled.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-2-njavali@marvell.com
+Fixes: 8e2d81c6b5be ("scsi: qla2xxx: Fix excessive messages during device logout")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_isr.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index 21b31d6359c8..ae47fc559ae0 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -2639,7 +2639,7 @@ static void qla24xx_nvme_iocb_entry(scsi_qla_host_t *vha, struct req_que *req,
+       }
+ 
+       if (unlikely(logit))
+-              ql_log(ql_dbg_io, fcport->vha, 0x5060,
++              ql_dbg(ql_dbg_io, fcport->vha, 0x5060,
+                  "NVME-%s ERR Handling - hdl=%x status(%x) tr_len:%x resid=%x  ox_id=%x\n",
+                  sp->name, sp->handle, comp_status,
+                  fd->transferred_length, le32_to_cpu(sts->residual_len),
+@@ -3496,7 +3496,7 @@ qla2x00_status_entry(scsi_qla_host_t *vha, struct rsp_que *rsp, void *pkt)
+ 
+ out:
+       if (logit)
+-              ql_log(ql_dbg_io, fcport->vha, 0x3022,
++              ql_dbg(ql_dbg_io, fcport->vha, 0x3022,
+                      "FCP command status: 0x%x-0x%x (0x%x) nexus=%ld:%d:%llu portid=%02x%02x%02x oxid=0x%x cdb=%10phN len=0x%x rsp_info=0x%x resid=0x%x fw_resid=0x%x sp=%p cp=%p.\n",
+                      comp_status, scsi_status, res, vha->host_no,
+                      cp->device->id, cp->device->lun, fcport->d_id.b.domain,
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-12738 b/queue-5.19/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-12738

new file mode 100644 (file)

index 0000000..5a7130d
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-12738
@@ -0,0 +1,61 @@
+From 484e602f833c764c9bb58e8b43c1c64302a8814b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:41 -0700
+Subject: scsi: qla2xxx: Fix imbalance vha->vref_count
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit 63fa7f2644b4b48e1913af33092c044bf48e9321 ]
+
+vref_count took an extra decrement in the task management path.  Add an
+extra ref count to compensate the imbalance.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-7-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_init.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index ef6857ad148d..f8a7b6f2541e 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -168,6 +168,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+       struct srb_iocb *abt_iocb;
+       srb_t *sp;
+       int rval = QLA_FUNCTION_FAILED;
++      uint8_t bail;
+ 
+       /* ref: INIT for ABTS command */
+       sp = qla2xxx_get_qpair_sp(cmd_sp->vha, cmd_sp->qpair, cmd_sp->fcport,
+@@ -175,6 +176,7 @@ int qla24xx_async_abort_cmd(srb_t *cmd_sp, bool wait)
+       if (!sp)
+               return QLA_MEMORY_ALLOC_FAILED;
+ 
++      QLA_VHA_MARK_BUSY(vha, bail);
+       abt_iocb = &sp->u.iocb_cmd;
+       sp->type = SRB_ABT_CMD;
+       sp->name = "abort";
+@@ -2011,12 +2013,14 @@ qla2x00_async_tm_cmd(fc_port_t *fcport, uint32_t flags, uint32_t lun,
+       struct srb_iocb *tm_iocb;
+       srb_t *sp;
+       int rval = QLA_FUNCTION_FAILED;
++      uint8_t bail;
+ 
+       /* ref: INIT */
+       sp = qla2x00_get_sp(vha, fcport, GFP_KERNEL);
+       if (!sp)
+               goto done;
+ 
++      QLA_VHA_MARK_BUSY(vha, bail);
+       sp->type = SRB_TM_CMD;
+       sp->name = "tmf";
+       qla2x00_init_async_sp(sp, qla2x00_get_async_timeout(vha),
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-incorrect-display-of-max-frame-size.patch-30577 b/queue-5.19/scsi-qla2xxx-fix-incorrect-display-of-max-frame-size.patch-30577

new file mode 100644 (file)

index 0000000..5434137
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-incorrect-display-of-max-frame-size.patch-30577
@@ -0,0 +1,110 @@
+From 8624f3a1810a248525365cbad29b1bfc9356ec61 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:37 -0700
+Subject: scsi: qla2xxx: Fix incorrect display of max frame size
+
+From: Bikash Hazarika <bhazarika@marvell.com>
+
+[ Upstream commit cf3b4fb655796674e605268bd4bfb47a47c8bce6 ]
+
+Replace display field with the correct field.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-3-njavali@marvell.com
+Fixes: 8777e4314d39 ("scsi: qla2xxx: Migrate NVME N2N handling into state machine")
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_def.h  | 1 +
+ drivers/scsi/qla2xxx/qla_gs.c   | 9 +++------
+ drivers/scsi/qla2xxx/qla_init.c | 2 ++
+ drivers/scsi/qla2xxx/qla_isr.c  | 4 +---
+ 4 files changed, 7 insertions(+), 9 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 77ef82df6d4d..5d594c82d14c 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -3975,6 +3975,7 @@ struct qla_hw_data {
+       /* SRB cache. */
+ #define SRB_MIN_REQ     128
+       mempool_t       *srb_mempool;
++      u8 port_name[WWN_SIZE];
+ 
+       volatile struct {
+               uint32_t        mbox_int                :1;
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index f56b578475ba..c999221912e5 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -1596,7 +1596,6 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+       unsigned int callopt)
+ {
+       struct qla_hw_data *ha = vha->hw;
+-      struct init_cb_24xx *icb24 = (void *)ha->init_cb;
+       struct new_utsname *p_sysid = utsname();
+       struct ct_fdmi_hba_attr *eiter;
+       uint16_t alen;
+@@ -1758,8 +1757,8 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+       /* MAX CT Payload Length */
+       eiter = entries + size;
+       eiter->type = cpu_to_be16(FDMI_HBA_MAXIMUM_CT_PAYLOAD_LENGTH);
+-      eiter->a.max_ct_len = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
+-              icb24->frame_payload_size : ha->init_cb->frame_payload_size));
++      eiter->a.max_ct_len = cpu_to_be32(ha->frame_payload_size >> 2);
++
+       alen = sizeof(eiter->a.max_ct_len);
+       alen += FDMI_ATTR_TYPELEN(eiter);
+       eiter->len = cpu_to_be16(alen);
+@@ -1851,7 +1850,6 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries,
+       unsigned int callopt)
+ {
+       struct qla_hw_data *ha = vha->hw;
+-      struct init_cb_24xx *icb24 = (void *)ha->init_cb;
+       struct new_utsname *p_sysid = utsname();
+       char *hostname = p_sysid ?
+               p_sysid->nodename : fc_host_system_hostname(vha->host);
+@@ -1903,8 +1901,7 @@ qla2x00_port_attributes(scsi_qla_host_t *vha, void *entries,
+       /* Max frame size. */
+       eiter = entries + size;
+       eiter->type = cpu_to_be16(FDMI_PORT_MAX_FRAME_SIZE);
+-      eiter->a.max_frame_size = cpu_to_be32(le16_to_cpu(IS_FWI2_CAPABLE(ha) ?
+-              icb24->frame_payload_size : ha->init_cb->frame_payload_size));
++      eiter->a.max_frame_size = cpu_to_be32(ha->frame_payload_size);
+       alen = sizeof(eiter->a.max_frame_size);
+       alen += FDMI_ATTR_TYPELEN(eiter);
+       eiter->len = cpu_to_be16(alen);
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index ad96bc19ed05..ef6857ad148d 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -4520,6 +4520,8 @@ qla2x00_init_rings(scsi_qla_host_t *vha)
+                        BIT_6) != 0;
+               ql_dbg(ql_dbg_init, vha, 0x00bc, "FA-WWPN Support: %s.\n",
+                   (ha->flags.fawwpn_enabled) ? "enabled" : "disabled");
++              /* Init_cb will be reused for other command(s).  Save a backup copy of port_name */
++              memcpy(ha->port_name, ha->init_cb->port_name, WWN_SIZE);
+       }
+ 
+       /* ELS pass through payload is limit by frame size. */
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index 5f2949e03fc8..fc252dbfb0bf 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -1354,9 +1354,7 @@ qla2x00_async_event(scsi_qla_host_t *vha, struct rsp_que *rsp, uint16_t *mb)
+                       if (!vha->vp_idx) {
+                               if (ha->flags.fawwpn_enabled &&
+                                   (ha->current_topology == ISP_CFG_F)) {
+-                                      void *wwpn = ha->init_cb->port_name;
+-
+-                                      memcpy(vha->port_name, wwpn, WWN_SIZE);
++                                      memcpy(vha->port_name, ha->port_name, WWN_SIZE);
+                                       fc_host_port_name(vha->host) =
+                                           wwn_to_u64(vha->port_name);
+                                       ql_dbg(ql_dbg_init + ql_dbg_verbose,
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch b/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch

new file mode 100644 (file)

index 0000000..270ba67
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch
@@ -0,0 +1,41 @@
+From 784659d081ede75e363b4ecc62e6719d952efee0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:03 -0700
+Subject: scsi: qla2xxx: Fix losing FCP-2 targets during port perturbation
+ tests
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 58d1c124cd79ea686b512043c5bd515590b2ed95 ]
+
+When a mix of FCP-2 (tape) and non-FCP-2 targets are present, FCP-2 target
+state was incorrectly transitioned when both of the targets were gone. Fix
+this by ignoring state transition for FCP-2 targets.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-7-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gs.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index e811de2f6a25..f56b578475ba 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -3578,7 +3578,7 @@ void qla24xx_async_gnnft_done(scsi_qla_host_t *vha, srb_t *sp)
+                               do_delete) {
+                               if (fcport->loop_id != FC_NO_LOOP_ID) {
+                                       if (fcport->flags & FCF_FCP2_DEVICE)
+-                                              fcport->logout_on_delete = 0;
++                                              continue;
+ 
+                                       ql_log(ql_log_warn, vha, 0x20f0,
+                                              "%s %d %8phC post del sess\n",
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch b/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch

new file mode 100644 (file)

index 0000000..5eeb237
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch
@@ -0,0 +1,72 @@
+From f71cffc3a88951cfc872dd7ad62e3145f7852d5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:06 -0700
+Subject: scsi: qla2xxx: Fix losing FCP-2 targets on long port disable with
+ I/Os
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 2416ccd3815ba1613e10a6da0a24ef21acfe5633 ]
+
+FCP-2 devices were not coming back online once they were lost, login
+retries exhausted, and then came back up.  Fix this by accepting RSCN when
+the device is not online.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-10-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_init.c | 12 ++++++++----
+ 1 file changed, 8 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 88ca398be485..b6c3f66c4988 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -1825,7 +1825,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+       case RSCN_PORT_ADDR:
+               fcport = qla2x00_find_fcport_by_nportid(vha, &ea->id, 1);
+               if (fcport) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE) {
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE) {
+                               ql_dbg(ql_dbg_disc, vha, 0x2115,
+                                      "Delaying session delete for FCP2 portid=%06x %8phC ",
+                                       fcport->d_id.b24, fcport->port_name);
+@@ -1857,7 +1858,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+               break;
+       case RSCN_AREA_ADDR:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+ 
+                       if ((ea->id.b24 & 0xffff00) == (fcport->d_id.b24 & 0xffff00)) {
+@@ -1868,7 +1870,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+               break;
+       case RSCN_DOM_ADDR:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+ 
+                       if ((ea->id.b24 & 0xff0000) == (fcport->d_id.b24 & 0xff0000)) {
+@@ -1880,7 +1883,8 @@ void qla2x00_handle_rscn(scsi_qla_host_t *vha, struct event_arg *ea)
+       case RSCN_FAB_ADDR:
+       default:
+               list_for_each_entry(fcport, &vha->vp_fcports, list) {
+-                      if (fcport->flags & FCF_FCP2_DEVICE)
++                      if (fcport->flags & FCF_FCP2_DEVICE &&
++                          atomic_read(&fcport->state) == FCS_ONLINE)
+                               continue;
+ 
+                       fcport->scan_needed = 1;
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch b/queue-5.19/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch

new file mode 100644 (file)

index 0000000..608b792
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch
@@ -0,0 +1,84 @@
+From ec0466138dab2ba0b7570578a316aef299cf25a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:04 -0700
+Subject: scsi: qla2xxx: Fix losing target when it reappears during delete
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit 118b0c863c8f5629cc5271fc24d72d926e0715d9 ]
+
+FC target disappeared during port perturbation tests due to a race that
+tramples target state.  Fix the issue by adding state checks before
+proceeding.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-8-njavali@marvell.com
+Fixes: 44c57f205876 ("scsi: qla2xxx: Changes to support FCP2 Target")
+Cc: stable@vger.kernel.org
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_attr.c | 24 +++++++++++++++++-------
+ 1 file changed, 17 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_attr.c b/drivers/scsi/qla2xxx/qla_attr.c
+index 3b3e4234f37a..412ad888bdc1 100644
+--- a/drivers/scsi/qla2xxx/qla_attr.c
++++ b/drivers/scsi/qla2xxx/qla_attr.c
+@@ -2716,17 +2716,24 @@ qla2x00_dev_loss_tmo_callbk(struct fc_rport *rport)
+       if (!fcport)
+               return;
+ 
+-      /* Now that the rport has been deleted, set the fcport state to
+-         FCS_DEVICE_DEAD */
+-      qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
++
++      /*
++       * Now that the rport has been deleted, set the fcport state to
++       * FCS_DEVICE_DEAD, if the fcport is still lost.
++       */
++      if (fcport->scan_state != QLA_FCPORT_FOUND)
++              qla2x00_set_fcport_state(fcport, FCS_DEVICE_DEAD);
+ 
+       /*
+        * Transport has effectively 'deleted' the rport, clear
+        * all local references.
+        */
+       spin_lock_irqsave(host->host_lock, flags);
+-      fcport->rport = fcport->drport = NULL;
+-      *((fc_port_t **)rport->dd_data) = NULL;
++      /* Confirm port has not reappeared before clearing pointers. */
++      if (rport->port_state != FC_PORTSTATE_ONLINE) {
++              fcport->rport = fcport->drport = NULL;
++              *((fc_port_t **)rport->dd_data) = NULL;
++      }
+       spin_unlock_irqrestore(host->host_lock, flags);
+ 
+       if (test_bit(ABORT_ISP_ACTIVE, &fcport->vha->dpc_flags))
+@@ -2759,9 +2766,12 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
+       /*
+        * At this point all fcport's software-states are cleared.  Perform any
+        * final cleanup of firmware resources (PCBs and XCBs).
++       *
++       * Attempt to cleanup only lost devices.
+        */
+       if (fcport->loop_id != FC_NO_LOOP_ID) {
+-              if (IS_FWI2_CAPABLE(fcport->vha->hw)) {
++              if (IS_FWI2_CAPABLE(fcport->vha->hw) &&
++                  fcport->scan_state != QLA_FCPORT_FOUND) {
+                       if (fcport->loop_id != FC_NO_LOOP_ID)
+                               fcport->logout_on_delete = 1;
+ 
+@@ -2771,7 +2781,7 @@ qla2x00_terminate_rport_io(struct fc_rport *rport)
+                                      __LINE__);
+                               qlt_schedule_sess_for_deletion(fcport);
+                       }
+-              } else {
++              } else if (!IS_FWI2_CAPABLE(fcport->vha->hw)) {
+                       qla2x00_port_logout(fcport->vha, fcport);
+               }
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch b/queue-5.19/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch

new file mode 100644 (file)

index 0000000..36ca237
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch
@@ -0,0 +1,128 @@
+From 3cd334b11de490fefa146ff75313d30f8c9f75a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:39 -0700
+Subject: scsi: qla2xxx: Fix response queue handler reading stale packets
+
+From: Arun Easi <aeasi@marvell.com>
+
+[ Upstream commit b1f707146923335849fb70237eec27d4d1ae7d62 ]
+
+On some platforms, the current logic of relying on finding new packet
+solely based on signature pattern can lead to driver reading stale
+packets. Though this is a bug in those platforms, reduce such exposures by
+limiting reading packets until the IN pointer.
+
+Two module parameters are introduced:
+
+  ql2xrspq_follow_inptr:
+
+    When set, on newer adapters that has queue pointer shadowing, look for
+    response packets only until response queue in pointer.
+
+    When reset, response packets are read based on a signature pattern
+    logic (old way).
+
+  ql2xrspq_follow_inptr_legacy:
+
+    Like ql2xrspq_follow_inptr, but for those adapters where there is no
+    queue pointer shadowing.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-5-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Arun Easi <aeasi@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_gbl.h |  2 ++
+ drivers/scsi/qla2xxx/qla_isr.c | 24 +++++++++++++++++++++++-
+ drivers/scsi/qla2xxx/qla_os.c  | 10 ++++++++++
+ 3 files changed, 35 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_gbl.h b/drivers/scsi/qla2xxx/qla_gbl.h
+index dac27b5ff0ac..a211ed18d4e9 100644
+--- a/drivers/scsi/qla2xxx/qla_gbl.h
++++ b/drivers/scsi/qla2xxx/qla_gbl.h
+@@ -193,6 +193,8 @@ extern int ql2xsecenable;
+ extern int ql2xenforce_iocb_limit;
+ extern int ql2xabts_wait_nvme;
+ extern u32 ql2xnvme_queues;
++extern int ql2xrspq_follow_inptr;
++extern int ql2xrspq_follow_inptr_legacy;
+ 
+ extern int qla2x00_loop_reset(scsi_qla_host_t *);
+ extern void qla2x00_abort_all_cmds(scsi_qla_host_t *, int);
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index fc252dbfb0bf..1353a5b61c14 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -3770,6 +3770,8 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+       struct qla_hw_data *ha = vha->hw;
+       struct purex_entry_24xx *purex_entry;
+       struct purex_item *pure_item;
++      u16 rsp_in = 0;
++      int follow_inptr, is_shadow_hba;
+ 
+       if (!ha->flags.fw_started)
+               return;
+@@ -3779,7 +3781,25 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+               qla_cpu_update(rsp->qpair, smp_processor_id());
+       }
+ 
+-      while (rsp->ring_ptr->signature != RESPONSE_PROCESSED) {
++#define __update_rsp_in(_update, _is_shadow_hba, _rsp, _rsp_in)               \
++      do {                                                            \
++              if (_update) {                                          \
++                      _rsp_in = _is_shadow_hba ? *(_rsp)->in_ptr :    \
++                              rd_reg_dword_relaxed((_rsp)->rsp_q_in); \
++              }                                                       \
++      } while (0)
++
++      is_shadow_hba = IS_SHADOW_REG_CAPABLE(ha);
++      follow_inptr = is_shadow_hba ? ql2xrspq_follow_inptr :
++                              ql2xrspq_follow_inptr_legacy;
++
++      __update_rsp_in(follow_inptr, is_shadow_hba, rsp, rsp_in);
++
++      while ((likely(follow_inptr &&
++                     rsp->ring_index != rsp_in &&
++                     rsp->ring_ptr->signature != RESPONSE_PROCESSED)) ||
++                     (!follow_inptr &&
++                      rsp->ring_ptr->signature != RESPONSE_PROCESSED)) {
+               pkt = (struct sts_entry_24xx *)rsp->ring_ptr;
+ 
+               rsp->ring_index++;
+@@ -3892,6 +3912,8 @@ void qla24xx_process_response_queue(struct scsi_qla_host *vha,
+                               }
+                               pure_item = qla27xx_copy_fpin_pkt(vha,
+                                                         (void **)&pkt, &rsp);
++                              __update_rsp_in(follow_inptr, is_shadow_hba,
++                                              rsp, rsp_in);
+                               if (!pure_item)
+                                       break;
+                               qla24xx_queue_purex_item(vha, pure_item,
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 66f1723c8583..0bbb48d31441 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -338,6 +338,16 @@ module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
+ MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
+       "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
+ 
++int ql2xrspq_follow_inptr = 1;
++module_param(ql2xrspq_follow_inptr, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr,
++               "Follow RSP IN pointer for RSP updates for HBAs 27xx and newer (default: 1).");
++
++int ql2xrspq_follow_inptr_legacy = 1;
++module_param(ql2xrspq_follow_inptr_legacy, int, 0644);
++MODULE_PARM_DESC(ql2xrspq_follow_inptr_legacy,
++               "Follow RSP IN pointer for RSP updates for HBAs older than 27XX. (default: 1).");
++
+ static void qla2x00_clear_drv_active(struct qla_hw_data *);
+ static void qla2x00_free_device(scsi_qla_host_t *);
+ static int qla2xxx_map_queues(struct Scsi_Host *shost);
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-20754 b/queue-5.19/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-20754

new file mode 100644 (file)

index 0000000..5c46406
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-20754
@@ -0,0 +1,68 @@
+From 707c5b307f91b8248188d891e19d58b2f5511157 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:01 -0700
+Subject: scsi: qla2xxx: Turn off multi-queue for 8G adapters
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit 5304673bdb1635e27555bd636fd5d6956f1cd552 ]
+
+For 8G adapters, multi-queue was enabled accidentally. Make sure
+multi-queue is not enabled.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-5-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_def.h |  4 ++--
+ drivers/scsi/qla2xxx/qla_isr.c | 16 ++++++----------
+ 2 files changed, 8 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index b4ff8eea7879..77ef82df6d4d 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -4260,8 +4260,8 @@ struct qla_hw_data {
+ #define IS_OEM_001(ha)          ((ha)->device_type & DT_OEM_001)
+ #define HAS_EXTENDED_IDS(ha)    ((ha)->device_type & DT_EXTENDED_IDS)
+ #define IS_CT6_SUPPORTED(ha)  ((ha)->device_type & DT_CT6_SUPPORTED)
+-#define IS_MQUE_CAPABLE(ha)   ((ha)->mqenable || IS_QLA83XX(ha) || \
+-                              IS_QLA27XX(ha) || IS_QLA28XX(ha))
++#define IS_MQUE_CAPABLE(ha)   (IS_QLA83XX(ha) || IS_QLA27XX(ha) || \
++                               IS_QLA28XX(ha))
+ #define IS_BIDI_CAPABLE(ha) \
+     (IS_QLA25XX(ha) || IS_QLA2031(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha))
+ /* Bit 21 of fw_attributes decides the MCTP capabilities */
+diff --git a/drivers/scsi/qla2xxx/qla_isr.c b/drivers/scsi/qla2xxx/qla_isr.c
+index ae47fc559ae0..5f2949e03fc8 100644
+--- a/drivers/scsi/qla2xxx/qla_isr.c
++++ b/drivers/scsi/qla2xxx/qla_isr.c
+@@ -4420,16 +4420,12 @@ qla24xx_enable_msix(struct qla_hw_data *ha, struct rsp_que *rsp)
+       }
+ 
+       /* Enable MSI-X vector for response queue update for queue 0 */
+-      if (IS_QLA83XX(ha) || IS_QLA27XX(ha) || IS_QLA28XX(ha)) {
+-              if (ha->msixbase && ha->mqiobase &&
+-                  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+-                   ql2xmqsupport))
+-                      ha->mqenable = 1;
+-      } else
+-              if (ha->mqiobase &&
+-                  (ha->max_rsp_queues > 1 || ha->max_req_queues > 1 ||
+-                   ql2xmqsupport))
+-                      ha->mqenable = 1;
++      if (IS_MQUE_CAPABLE(ha) &&
++          (ha->msixbase && ha->mqiobase && ha->max_qpairs))
++              ha->mqenable = 1;
++      else
++              ha->mqenable = 0;
++
+       ql_dbg(ql_dbg_multiq, vha, 0xc005,
+           "mqiobase=%p, max_rsp_queues=%d, max_req_queues=%d.\n",
+           ha->mqiobase, ha->max_rsp_queues, ha->max_req_queues);
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-update-manufacturer-details.patch b/queue-5.19/scsi-qla2xxx-update-manufacturer-details.patch

new file mode 100644 (file)

index 0000000..1ff9748
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-update-manufacturer-details.patch
@@ -0,0 +1,52 @@
+From 8df583fe738190a8f20b53f8c755ef7fdc1020b2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:44 -0700
+Subject: scsi: qla2xxx: Update manufacturer details
+
+From: Bikash Hazarika <bhazarika@marvell.com>
+
+[ Upstream commit 1ccad27716ecad1fd58c35e579bedb81fa5e1ad5 ]
+
+Update manufacturer details to indicate Marvell Semiconductors.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-10-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_def.h | 2 +-
+ drivers/scsi/qla2xxx/qla_gs.c  | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index 5d594c82d14c..5a1a2ab7b852 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -78,7 +78,7 @@ typedef union {
+ #include "qla_nvme.h"
+ #define QLA2XXX_DRIVER_NAME   "qla2xxx"
+ #define QLA2XXX_APIDEV                "ql2xapidev"
+-#define QLA2XXX_MANUFACTURER  "QLogic Corporation"
++#define QLA2XXX_MANUFACTURER  "Marvell Semiconductor, Inc."
+ 
+ /*
+  * We have MAILBOX_REGISTER_COUNT sized arrays in a few places,
+diff --git a/drivers/scsi/qla2xxx/qla_gs.c b/drivers/scsi/qla2xxx/qla_gs.c
+index c999221912e5..0a95816afd0b 100644
+--- a/drivers/scsi/qla2xxx/qla_gs.c
++++ b/drivers/scsi/qla2xxx/qla_gs.c
+@@ -1616,7 +1616,7 @@ qla2x00_hba_attributes(scsi_qla_host_t *vha, void *entries,
+       eiter->type = cpu_to_be16(FDMI_HBA_MANUFACTURER);
+       alen = scnprintf(
+               eiter->a.manufacturer, sizeof(eiter->a.manufacturer),
+-              "%s", "QLogic Corporation");
++              "%s", QLA2XXX_MANUFACTURER);
+       alen += FDMI_ATTR_ALIGNMENT(alen);
+       alen += FDMI_ATTR_TYPELEN(eiter);
+       eiter->len = cpu_to_be16(alen);
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-31117 b/queue-5.19/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-31117

new file mode 100644 (file)

index 0000000..e26b7dd
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-31117
@@ -0,0 +1,210 @@
+From c751cb3aef1515ae098ecbb8829f3aabc5f17cac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jun 2022 22:35:00 -0700
+Subject: scsi: qla2xxx: Wind down adapter after PCIe error
+
+From: Quinn Tran <qutran@marvell.com>
+
+[ Upstream commit d3117c83ba316b3200d9f2fe900f2b9a5525a25c ]
+
+Put adapter into a wind down state if OS does not make any attempt to
+recover the adapter after PCIe error.
+
+Link: https://lore.kernel.org/r/20220616053508.27186-4-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_bsg.c  | 10 ++++++-
+ drivers/scsi/qla2xxx/qla_def.h  |  4 +++
+ drivers/scsi/qla2xxx/qla_init.c | 20 ++++++++++++++
+ drivers/scsi/qla2xxx/qla_os.c   | 48 +++++++++++++++++++++++++++++++++
+ 4 files changed, 81 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_bsg.c b/drivers/scsi/qla2xxx/qla_bsg.c
+index c2f00f076f79..726af9e40572 100644
+--- a/drivers/scsi/qla2xxx/qla_bsg.c
++++ b/drivers/scsi/qla2xxx/qla_bsg.c
+@@ -2975,6 +2975,13 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+ 
+       ql_log(ql_log_info, vha, 0x708b, "%s CMD timeout. bsg ptr %p.\n",
+           __func__, bsg_job);
++
++      if (qla2x00_isp_reg_stat(ha)) {
++              ql_log(ql_log_info, vha, 0x9007,
++                  "PCI/Register disconnect.\n");
++              qla_pci_set_eeh_busy(vha);
++      }
++
+       /* find the bsg job from the active list of commands */
+       spin_lock_irqsave(&ha->hardware_lock, flags);
+       for (que = 0; que < ha->max_req_queues; que++) {
+@@ -2992,7 +2999,8 @@ qla24xx_bsg_timeout(struct bsg_job *bsg_job)
+                           sp->u.bsg_job == bsg_job) {
+                               req->outstanding_cmds[cnt] = NULL;
+                               spin_unlock_irqrestore(&ha->hardware_lock, flags);
+-                              if (ha->isp_ops->abort_command(sp)) {
++
++                              if (!ha->flags.eeh_busy && ha->isp_ops->abort_command(sp)) {
+                                       ql_log(ql_log_warn, vha, 0x7089,
+                                           "mbx abort_command failed.\n");
+                                       bsg_reply->result = -EIO;
+diff --git a/drivers/scsi/qla2xxx/qla_def.h b/drivers/scsi/qla2xxx/qla_def.h
+index e8f69c486be1..b4ff8eea7879 100644
+--- a/drivers/scsi/qla2xxx/qla_def.h
++++ b/drivers/scsi/qla2xxx/qla_def.h
+@@ -4040,6 +4040,9 @@ struct qla_hw_data {
+               uint32_t        n2n_fw_acc_sec:1;
+               uint32_t        plogi_template_valid:1;
+               uint32_t        port_isolated:1;
++              uint32_t        eeh_flush:2;
++#define EEH_FLUSH_RDY  1
++#define EEH_FLUSH_DONE 2
+       } flags;
+ 
+       uint16_t max_exchg;
+@@ -4074,6 +4077,7 @@ struct qla_hw_data {
+       uint32_t                rsp_que_len;
+       uint32_t                req_que_off;
+       uint32_t                rsp_que_off;
++      unsigned long           eeh_jif;
+ 
+       /* Multi queue data structs */
+       device_reg_t *mqiobase;
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index 3f3417a3e891..88ca398be485 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -47,6 +47,7 @@ qla2x00_sp_timeout(struct timer_list *t)
+ {
+       srb_t *sp = from_timer(sp, t, u.iocb_cmd.timer);
+       struct srb_iocb *iocb;
++      scsi_qla_host_t *vha = sp->vha;
+ 
+       WARN_ON(irqs_disabled());
+       iocb = &sp->u.iocb_cmd;
+@@ -54,6 +55,12 @@ qla2x00_sp_timeout(struct timer_list *t)
+ 
+       /* ref: TMR */
+       kref_put(&sp->cmd_kref, qla2x00_sp_release);
++
++      if (vha && qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9008,
++                  "PCI/Register disconnect.\n");
++              qla_pci_set_eeh_busy(vha);
++      }
+ }
+ 
+ void qla2x00_sp_free(srb_t *sp)
+@@ -9657,6 +9664,12 @@ int qla2xxx_disable_port(struct Scsi_Host *host)
+ 
+       vha->hw->flags.port_isolated = 1;
+ 
++      if (qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9006,
++                  "PCI/Register disconnect, exiting.\n");
++              qla_pci_set_eeh_busy(vha);
++              return FAILED;
++      }
+       if (qla2x00_chip_is_down(vha))
+               return 0;
+ 
+@@ -9672,6 +9685,13 @@ int qla2xxx_enable_port(struct Scsi_Host *host)
+ {
+       scsi_qla_host_t *vha = shost_priv(host);
+ 
++      if (qla2x00_isp_reg_stat(vha->hw)) {
++              ql_log(ql_log_info, vha, 0x9001,
++                  "PCI/Register disconnect, exiting.\n");
++              qla_pci_set_eeh_busy(vha);
++              return FAILED;
++      }
++
+       vha->hw->flags.port_isolated = 0;
+       /* Set the flag to 1, so that isp_abort can proceed */
+       vha->flags.online = 1;
+diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
+index 73073fb08369..6fd5c21ad1f5 100644
+--- a/drivers/scsi/qla2xxx/qla_os.c
++++ b/drivers/scsi/qla2xxx/qla_os.c
+@@ -333,6 +333,11 @@ MODULE_PARM_DESC(ql2xabts_wait_nvme,
+                "To wait for ABTS response on I/O timeouts for NVMe. (default: 1)");
+ 
+ 
++u32 ql2xdelay_before_pci_error_handling = 5;
++module_param(ql2xdelay_before_pci_error_handling, uint, 0644);
++MODULE_PARM_DESC(ql2xdelay_before_pci_error_handling,
++      "Number of seconds delayed before qla begin PCI error self-handling (default: 5).\n");
++
+ static void qla2x00_clear_drv_active(struct qla_hw_data *);
+ static void qla2x00_free_device(scsi_qla_host_t *);
+ static int qla2xxx_map_queues(struct Scsi_Host *shost);
+@@ -7238,6 +7243,44 @@ static void qla_heart_beat(struct scsi_qla_host *vha, u16 dpc_started)
+       }
+ }
+ 
++static void qla_wind_down_chip(scsi_qla_host_t *vha)
++{
++      struct qla_hw_data *ha = vha->hw;
++
++      if (!ha->flags.eeh_busy)
++              return;
++      if (ha->pci_error_state)
++              /* system is trying to recover */
++              return;
++
++      /*
++       * Current system is not handling PCIE error.  At this point, this is
++       * best effort to wind down the adapter.
++       */
++      if (time_after_eq(jiffies, ha->eeh_jif + ql2xdelay_before_pci_error_handling * HZ) &&
++          !ha->flags.eeh_flush) {
++              ql_log(ql_log_info, vha, 0x9009,
++                  "PCI Error detected, attempting to reset hardware.\n");
++
++              ha->isp_ops->reset_chip(vha);
++              ha->isp_ops->disable_intrs(ha);
++
++              ha->flags.eeh_flush = EEH_FLUSH_RDY;
++              ha->eeh_jif = jiffies;
++
++      } else if (ha->flags.eeh_flush == EEH_FLUSH_RDY &&
++          time_after_eq(jiffies, ha->eeh_jif +  5 * HZ)) {
++              pci_clear_master(ha->pdev);
++
++              /* flush all command */
++              qla2x00_abort_isp_cleanup(vha);
++              ha->flags.eeh_flush = EEH_FLUSH_DONE;
++
++              ql_log(ql_log_info, vha, 0x900a,
++                  "PCI Error handling complete, all IOs aborted.\n");
++      }
++}
++
+ /**************************************************************************
+ *   qla2x00_timer
+ *
+@@ -7261,6 +7304,8 @@ qla2x00_timer(struct timer_list *t)
+       fc_port_t *fcport = NULL;
+ 
+       if (ha->flags.eeh_busy) {
++              qla_wind_down_chip(vha);
++
+               ql_dbg(ql_dbg_timer, vha, 0x6000,
+                   "EEH = %d, restarting timer.\n",
+                   ha->flags.eeh_busy);
+@@ -7841,6 +7886,9 @@ void qla_pci_set_eeh_busy(struct scsi_qla_host *vha)
+ 
+       spin_lock_irqsave(&base_vha->work_lock, flags);
+       if (!ha->flags.eeh_busy) {
++              ha->eeh_jif = jiffies;
++              ha->flags.eeh_flush = 0;
++
+               ha->flags.eeh_busy = 1;
+               do_cleanup = true;
+       }
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-qla2xxx-zero-undefined-mailbox-in-registers.patch-4895 b/queue-5.19/scsi-qla2xxx-zero-undefined-mailbox-in-registers.patch-4895

new file mode 100644 (file)

index 0000000..77770fb
--- /dev/null
+++ b/queue-5.19/scsi-qla2xxx-zero-undefined-mailbox-in-registers.patch-4895
@@ -0,0 +1,41 @@
+From 74f0c3d28a9666032ef45b5df22ebf8019605add Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:38 -0700
+Subject: scsi: qla2xxx: Zero undefined mailbox IN registers
+
+From: Bikash Hazarika <bhazarika@marvell.com>
+
+[ Upstream commit 6c96a3c7d49593ef15805f5e497601c87695abc9 ]
+
+While requesting a new mailbox command, driver does not write any data to
+unused registers.  Initialize the unused register value to zero while
+requesting a new mailbox command to prevent stale entry access by firmware.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-4-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Bikash Hazarika <bhazarika@marvell.com>
+Signed-off-by: Quinn Tran <qutran@marvell.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_mbx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/scsi/qla2xxx/qla_mbx.c b/drivers/scsi/qla2xxx/qla_mbx.c
+index 1b154ab025bd..bcade1deb798 100644
+--- a/drivers/scsi/qla2xxx/qla_mbx.c
++++ b/drivers/scsi/qla2xxx/qla_mbx.c
+@@ -238,6 +238,8 @@ qla2x00_mailbox_command(scsi_qla_host_t *vha, mbx_cmd_t *mcp)
+                       ql_dbg(ql_dbg_mbx, vha, 0x1112,
+                           "mbox[%d]<-0x%04x\n", cnt, *iptr);
+                       wrt_reg_word(optr, *iptr);
++              } else {
++                      wrt_reg_word(optr, 0);
+               }
+ 
+               mboxes >>= 1;
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-revert-scsi-qla2xxx-fix-disk-failure-to-redisco.patch b/queue-5.19/scsi-revert-scsi-qla2xxx-fix-disk-failure-to-redisco.patch

new file mode 100644 (file)

index 0000000..53e439f
--- /dev/null
+++ b/queue-5.19/scsi-revert-scsi-qla2xxx-fix-disk-failure-to-redisco.patch
@@ -0,0 +1,74 @@
+From 0c185f582d18580b73851b74739c9ab1501c2fa7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 12 Jul 2022 22:20:36 -0700
+Subject: scsi: Revert "scsi: qla2xxx: Fix disk failure to rediscover"
+
+From: Nilesh Javali <njavali@marvell.com>
+
+[ Upstream commit 5bc7b01c513a4a9b4cfe306e8d1720cfcfd3b8a3 ]
+
+This fixes the regression of NVMe discovery failure during driver load
+time.
+
+This reverts commit 6a45c8e137d4e2c72eecf1ac7cf64f2fdfcead99.
+
+Link: https://lore.kernel.org/r/20220713052045.10683-2-njavali@marvell.com
+Cc: stable@vger.kernel.org
+Reviewed-by: Himanshu Madhani <himanshu.madhani@oracle.com>
+Signed-off-by: Nilesh Javali <njavali@marvell.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/qla2xxx/qla_init.c | 5 ++---
+ drivers/scsi/qla2xxx/qla_nvme.c | 5 -----
+ 2 files changed, 2 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/scsi/qla2xxx/qla_init.c b/drivers/scsi/qla2xxx/qla_init.c
+index b6c3f66c4988..ad96bc19ed05 100644
+--- a/drivers/scsi/qla2xxx/qla_init.c
++++ b/drivers/scsi/qla2xxx/qla_init.c
+@@ -5778,8 +5778,6 @@ qla2x00_reg_remote_port(scsi_qla_host_t *vha, fc_port_t *fcport)
+       if (atomic_read(&fcport->state) == FCS_ONLINE)
+               return;
+ 
+-      qla2x00_set_fcport_state(fcport, FCS_ONLINE);
+-
+       rport_ids.node_name = wwn_to_u64(fcport->node_name);
+       rport_ids.port_name = wwn_to_u64(fcport->port_name);
+       rport_ids.port_id = fcport->d_id.b.domain << 16 |
+@@ -5880,7 +5878,6 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
+               qla2x00_reg_remote_port(vha, fcport);
+               break;
+       case MODE_TARGET:
+-              qla2x00_set_fcport_state(fcport, FCS_ONLINE);
+               if (!vha->vha_tgt.qla_tgt->tgt_stop &&
+                       !vha->vha_tgt.qla_tgt->tgt_stopped)
+                       qlt_fc_port_added(vha, fcport);
+@@ -5898,6 +5895,8 @@ qla2x00_update_fcport(scsi_qla_host_t *vha, fc_port_t *fcport)
+       if (NVME_TARGET(vha->hw, fcport))
+               qla_nvme_register_remote(vha, fcport);
+ 
++      qla2x00_set_fcport_state(fcport, FCS_ONLINE);
++
+       if (IS_IIDMA_CAPABLE(vha->hw) && vha->hw->flags.gpsc_supported) {
+               if (fcport->id_changed) {
+                       fcport->id_changed = 0;
+diff --git a/drivers/scsi/qla2xxx/qla_nvme.c b/drivers/scsi/qla2xxx/qla_nvme.c
+index 87c9404aa401..7450c3458be7 100644
+--- a/drivers/scsi/qla2xxx/qla_nvme.c
++++ b/drivers/scsi/qla2xxx/qla_nvme.c
+@@ -37,11 +37,6 @@ int qla_nvme_register_remote(struct scsi_qla_host *vha, struct fc_port *fcport)
+               (fcport->nvme_flag & NVME_FLAG_REGISTERED))
+               return 0;
+ 
+-      if (atomic_read(&fcport->state) == FCS_ONLINE)
+-              return 0;
+-
+-      qla2x00_set_fcport_state(fcport, FCS_ONLINE);
+-
+       fcport->nvme_flag &= ~NVME_FLAG_RESETTING;
+ 
+       memset(&req, 0, sizeof(struct nvme_fc_port_info));
+-- 
+2.35.1
+
diff --git a/queue-5.19/scsi-sg-allow-waiting-for-commands-to-complete-on-re.patch b/queue-5.19/scsi-sg-allow-waiting-for-commands-to-complete-on-re.patch

new file mode 100644 (file)

index 0000000..c17f416
--- /dev/null
+++ b/queue-5.19/scsi-sg-allow-waiting-for-commands-to-complete-on-re.patch
@@ -0,0 +1,147 @@
+From 7180280ffcbdb8a1d9212cc2afab8b3d7b72ce5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 10:51:32 -0400
+Subject: scsi: sg: Allow waiting for commands to complete on removed device
+
+From: Tony Battersby <tonyb@cybernetics.com>
+
+[ Upstream commit 3455607fd7be10b449f5135c00dc306b85dc0d21 ]
+
+When a SCSI device is removed while in active use, currently sg will
+immediately return -ENODEV on any attempt to wait for active commands that
+were sent before the removal.  This is problematic for commands that use
+SG_FLAG_DIRECT_IO since the data buffer may still be in use by the kernel
+when userspace frees or reuses it after getting ENODEV, leading to
+corrupted userspace memory (in the case of READ-type commands) or corrupted
+data being sent to the device (in the case of WRITE-type commands).  This
+has been seen in practice when logging out of a iscsi_tcp session, where
+the iSCSI driver may still be processing commands after the device has been
+marked for removal.
+
+Change the policy to allow userspace to wait for active sg commands even
+when the device is being removed.  Return -ENODEV only when there are no
+more responses to read.
+
+Link: https://lore.kernel.org/r/5ebea46f-fe83-2d0b-233d-d0dcb362dd0a@cybernetics.com
+Cc: <stable@vger.kernel.org>
+Acked-by: Douglas Gilbert <dgilbert@interlog.com>
+Signed-off-by: Tony Battersby <tonyb@cybernetics.com>
+Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/scsi/sg.c | 53 +++++++++++++++++++++++++++++------------------
+ 1 file changed, 33 insertions(+), 20 deletions(-)
+
+diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
+index 118c7b4a8af2..340b050ad28d 100644
+--- a/drivers/scsi/sg.c
++++ b/drivers/scsi/sg.c
+@@ -195,7 +195,7 @@ static void sg_link_reserve(Sg_fd * sfp, Sg_request * srp, int size);
+ static void sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp);
+ static Sg_fd *sg_add_sfp(Sg_device * sdp);
+ static void sg_remove_sfp(struct kref *);
+-static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id);
++static Sg_request *sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy);
+ static Sg_request *sg_add_request(Sg_fd * sfp);
+ static int sg_remove_request(Sg_fd * sfp, Sg_request * srp);
+ static Sg_device *sg_get_dev(int dev);
+@@ -444,6 +444,7 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
+       Sg_fd *sfp;
+       Sg_request *srp;
+       int req_pack_id = -1;
++      bool busy;
+       sg_io_hdr_t *hp;
+       struct sg_header *old_hdr;
+       int retval;
+@@ -466,20 +467,16 @@ sg_read(struct file *filp, char __user *buf, size_t count, loff_t * ppos)
+       if (retval)
+               return retval;
+ 
+-      srp = sg_get_rq_mark(sfp, req_pack_id);
++      srp = sg_get_rq_mark(sfp, req_pack_id, &busy);
+       if (!srp) {             /* now wait on packet to arrive */
+-              if (atomic_read(&sdp->detaching))
+-                      return -ENODEV;
+               if (filp->f_flags & O_NONBLOCK)
+                       return -EAGAIN;
+               retval = wait_event_interruptible(sfp->read_wait,
+-                      (atomic_read(&sdp->detaching) ||
+-                      (srp = sg_get_rq_mark(sfp, req_pack_id))));
+-              if (atomic_read(&sdp->detaching))
+-                      return -ENODEV;
+-              if (retval)
+-                      /* -ERESTARTSYS as signal hit process */
+-                      return retval;
++                      ((srp = sg_get_rq_mark(sfp, req_pack_id, &busy)) ||
++                      (!busy && atomic_read(&sdp->detaching))));
++              if (!srp)
++                      /* signal or detaching */
++                      return retval ? retval : -ENODEV;
+       }
+       if (srp->header.interface_id != '\0')
+               return sg_new_read(sfp, buf, count, srp);
+@@ -940,9 +937,7 @@ sg_ioctl_common(struct file *filp, Sg_device *sdp, Sg_fd *sfp,
+               if (result < 0)
+                       return result;
+               result = wait_event_interruptible(sfp->read_wait,
+-                      (srp_done(sfp, srp) || atomic_read(&sdp->detaching)));
+-              if (atomic_read(&sdp->detaching))
+-                      return -ENODEV;
++                      srp_done(sfp, srp));
+               write_lock_irq(&sfp->rq_list_lock);
+               if (srp->done) {
+                       srp->done = 2;
+@@ -2079,19 +2074,28 @@ sg_unlink_reserve(Sg_fd * sfp, Sg_request * srp)
+ }
+ 
+ static Sg_request *
+-sg_get_rq_mark(Sg_fd * sfp, int pack_id)
++sg_get_rq_mark(Sg_fd * sfp, int pack_id, bool *busy)
+ {
+       Sg_request *resp;
+       unsigned long iflags;
+ 
++      *busy = false;
+       write_lock_irqsave(&sfp->rq_list_lock, iflags);
+       list_for_each_entry(resp, &sfp->rq_list, entry) {
+-              /* look for requests that are ready + not SG_IO owned */
+-              if ((1 == resp->done) && (!resp->sg_io_owned) &&
++              /* look for requests that are not SG_IO owned */
++              if ((!resp->sg_io_owned) &&
+                   ((-1 == pack_id) || (resp->header.pack_id == pack_id))) {
+-                      resp->done = 2; /* guard against other readers */
+-                      write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+-                      return resp;
++                      switch (resp->done) {
++                      case 0: /* request active */
++                              *busy = true;
++                              break;
++                      case 1: /* request done; response ready to return */
++                              resp->done = 2; /* guard against other readers */
++                              write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
++                              return resp;
++                      case 2: /* response already being returned */
++                              break;
++                      }
+               }
+       }
+       write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
+@@ -2145,6 +2149,15 @@ sg_remove_request(Sg_fd * sfp, Sg_request * srp)
+               res = 1;
+       }
+       write_unlock_irqrestore(&sfp->rq_list_lock, iflags);
++
++      /*
++       * If the device is detaching, wakeup any readers in case we just
++       * removed the last response, which would leave nothing for them to
++       * return other than -ENODEV.
++       */
++      if (unlikely(atomic_read(&sfp->parentdp->detaching)))
++              wake_up_interruptible_all(&sfp->read_wait);
++
+       return res;
+ }
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/serial-mvebu-uart-uart2-error-bits-clearing.patch-15528 b/queue-5.19/serial-mvebu-uart-uart2-error-bits-clearing.patch-15528

new file mode 100644 (file)

index 0000000..05a8e6c
--- /dev/null
+++ b/queue-5.19/serial-mvebu-uart-uart2-error-bits-clearing.patch-15528
@@ -0,0 +1,59 @@
+From 040076f0ab10143dc046c3347c9b3c04d0330d7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 11:12:21 +0200
+Subject: serial: mvebu-uart: uart2 error bits clearing
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Narendra Hadke <nhadke@marvell.com>
+
+[ Upstream commit a7209541239e5dd44d981289e5f9059222d40fd1 ]
+
+For mvebu uart2, error bits are not cleared on buffer read.
+This causes interrupt loop and system hang.
+
+Cc: stable@vger.kernel.org
+Reviewed-by: Yi Guo <yi.guo@cavium.com>
+Reviewed-by: Nadav Haklai <nadavh@marvell.com>
+Signed-off-by: Narendra Hadke <nhadke@marvell.com>
+Signed-off-by: Pali Rohár <pali@kernel.org>
+Link: https://lore.kernel.org/r/20220726091221.12358-1-pali@kernel.org
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/mvebu-uart.c | 11 +++++++++++
+ 1 file changed, 11 insertions(+)
+
+diff --git a/drivers/tty/serial/mvebu-uart.c b/drivers/tty/serial/mvebu-uart.c
+index 93489fe334d0..65eaecd10b7c 100644
+--- a/drivers/tty/serial/mvebu-uart.c
++++ b/drivers/tty/serial/mvebu-uart.c
+@@ -265,6 +265,7 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
+       struct tty_port *tport = &port->state->port;
+       unsigned char ch = 0;
+       char flag = 0;
++      int ret;
+ 
+       do {
+               if (status & STAT_RX_RDY(port)) {
+@@ -277,6 +278,16 @@ static void mvebu_uart_rx_chars(struct uart_port *port, unsigned int status)
+                               port->icount.parity++;
+               }
+ 
++              /*
++               * For UART2, error bits are not cleared on buffer read.
++               * This causes interrupt loop and system hang.
++               */
++              if (IS_EXTENDED(port) && (status & STAT_BRK_ERR)) {
++                      ret = readl(port->membase + UART_STAT);
++                      ret |= STAT_BRK_ERR;
++                      writel(ret, port->membase + UART_STAT);
++              }
++
+               if (status & STAT_BRK_DET) {
+                       port->icount.brk++;
+                       status &= ~(STAT_FRM_ERR | STAT_PAR_ERR);
+-- 
+2.35.1
+
diff --git a/queue-5.19/series b/queue-5.19/series

index 46103e80e2feff50c1ec418f6c0596e8737e0949..812b3120eb4df4dd164f6da5fd251fa0a9cb8c1d 100644 (file)
--- a/queue-5.19/series
+++ b/queue-5.19/series
@@ -1055,3 +1055,198 @@ __follow_mount_rcu-verify-that-mount_lock-remains-unchanged.patch
  spmi-trace-fix-stack-out-of-bound-access-in-spmi-tracing-functions.patch
  csky-abiv1-fixup-compile-error.patch
  drivers-base-fix-userspace-break-from-using-bin_attributes-for-cpumap-and-cpulist.patch
+kvm-drop-unused-gpa-param-from-gfn-pfn-cache-s-__rel.patch
+kvm-put-the-extra-pfn-reference-when-reusing-a-pfn-i.patch
+kvm-do-not-incorporate-page-offset-into-gfn-pfn-cach.patch
+kvm-fully-serialize-gfn-pfn-cache-refresh-via-mutex.patch-7350
+kvm-fix-multiple-races-in-gfn-pfn-cache-refresh.patch-19149
+hid-wacom-only-report-rotation-for-art-pen.patch-25074
+hid-wacom-don-t-register-pad_input-for-touch-switch.patch-820
+drm-nouveau-fix-another-off-by-one-in-nvbios_addr.patch-28623
+bpf-fix-kasan-use-after-free-read-in-compute_effecti.patch
+drm-mediatek-modify-dsi-funcs-to-atomic-operations.patch-7159
+drm-mediatek-separate-poweron-poweroff-from-enable-d.patch-3169
+drm-mediatek-keep-dsi-as-lp00-before-dcs-cmds-transf.patch
+kvm-svm-don-t-bug-if-userspace-injects-an-interrupt-.patch
+hid-hid-input-add-surface-go-battery-quirk.patch-7851
+crypto-ccp-use-kzalloc-for-sev-ioctl-interfaces-to-p.patch
+crypto-blake2s-remove-shash-module.patch
+drm-dp-mst-read-the-extended-dpcd-capabilities-durin.patch
+scsi-qla2xxx-fix-excessive-i-o-error-messages-by-def.patch
+scsi-qla2xxx-wind-down-adapter-after-pcie-error.patch-31117
+scsi-qla2xxx-turn-off-multi-queue-for-8g-adapters.patch-20754
+scsi-qla2xxx-fix-crash-due-to-stale-srb-access-aroun.patch
+scsi-qla2xxx-fix-losing-fcp-2-targets-during-port-pe.patch
+scsi-qla2xxx-fix-losing-target-when-it-reappears-dur.patch
+scsi-qla2xxx-fix-losing-fcp-2-targets-on-long-port-d.patch
+scsi-qla2xxx-fix-erroneous-mailbox-timeout-after-pci.patch
+fbcon-fix-accelerated-fbdev-scrolling-while-logo-is-.patch
+fbcon-fix-boundary-checks-for-fbcon-vc-n1-n2-paramet.patch
+kvm-nvmx-snapshot-pre-vm-enter-bndcfgs-for-nested_ru.patch
+kvm-nvmx-snapshot-pre-vm-enter-debugctl-for-nested_r.patch
+drm-hyperv-drm-include-framebuffer-and-edid-headers.patch-15144
+coresight-clear-the-connection-field-properly.patch-22500
+usbnet-fix-linkwatch-use-after-free-on-disconnect.patch-30140
+drm-fb-helper-fix-out-of-bounds-access.patch-14074
+drm-vc4-hdmi-disable-audio-if-dmas-property-is-prese.patch
+fix-short-copy-handling-in-copy_mc_pipe_to_iter.patch-23282
+powerpc-restore-config_debug_info-in-defconfigs.patch-27837
+powerpc-ptdump-fix-display-of-rw-pages-on-fsl_book3e.patch-3011
+powerpc-64e-fix-early-tlb-miss-with-kuap.patch-29650
+mtd-rawnand-arasan-update-nand-bus-clock-instead-of-.patch
+mtd-rawnand-arasan-fix-clock-rate-in-nv-ddr.patch-18581
+ia64-processor-fix-wincompatible-pointer-types-in-ia.patch
+usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch-22908
+drm-ingenic-use-the-highest-possible-dma-burst-size.patch-22931
+firmware-arm_scpi-ensure-scpi_info-is-not-assigned-i.patch
+media-isl7998x-select-v4l2_fwnode-to-fix-build-error.patch-24025
+__follow_mount_rcu-verify-that-mount_lock-remains-un.patch
+soundwire-qcom-check-device-status-before-reading-de.patch
+scsi-lpfc-remove-extra-atomic_inc-on-cmd_pending-in-.patch
+usb-dwc3-gadget-refactor-dwc3_repare_one_trb.patch-8861
+usb-dwc3-gadget-fix-high-speed-multiplier-setting.patch-13588
+intel_th-pci-add-meteor-lake-p-support.patch
+intel_th-pci-add-raptor-lake-s-pch-support.patch
+intel_th-pci-add-raptor-lake-s-cpu-support.patch
+drm-tegra-fix-vmapping-of-prime-buffers.patch-28390
+media-patch-pci-atomisp_cmd-fix-three-missing-checks.patch
+kvm-set_msr_mce-permit-guests-to-ignore-single-bit-e.patch
+kvm-x86-signal-gp-not-eperm-on-bad-wrmsr-mci_ctl-sta.patch
+iommu-vt-d-avoid-invalid-memory-access-via-node_onli.patch
+pci-aer-iterate-over-error-counters-instead-of-error.patch
+pci-qcom-power-on-phy-before-ipq8074-dbi-register-ac.patch
+drm-amdgpu-check-bo-s-requested-pinning-domains-agai.patch
+kvm-x86-mark-tss-busy-during-ltr-emulation-_after_-a.patch
+kvm-x86-set-error-code-to-segment-selector-on-lldt-l.patch
+mips-cpuinfo-fix-a-warning-for-config_cpumask_offsta.patch
+tty-8250-add-support-for-brainboxes-px-cards.patch-25863
+dm-writecache-set-a-default-max_writeback_jobs.patch
+drm-nouveau-kms-fix-failure-path-for-creating-dp-con.patch
+drm-nouveau-acpi-don-t-print-error-when-we-get-einpr.patch
+drm-nouveau-don-t-pm_runtime_put_sync-only-pm_runtim.patch
+alsa-bcd2000-fix-a-uaf-bug-on-the-error-path-of-prob.patch
+x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch
+drivers-base-fix-userspace-break-from-using-bin_attr.patch
+kexec_file-drop-weak-attribute-from-functions.patch
+kexec-clean-up-arch_kexec_kernel_verify_sig.patch
+kexec-keys-s390-make-use-of-built-in-and-secondary-k.patch
+tracing-events-add-__vstring-and-__assign_vstr-helpe.patch
+dm-thin-fix-use-after-free-crash-in-dm_sm_register_t.patch
+net-9p-initialize-the-iounit-field-during-fid-creati.patch
+um-remove-straying-parenthesis.patch-5379
+epoll-autoremove-wakers-even-more-aggressively.patch-6975
+arm-marvell-update-pcie-fixup.patch
+timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch
+um-seed-rng-using-host-os-rng.patch-8415
+scsi-revert-scsi-qla2xxx-fix-disk-failure-to-redisco.patch
+scsi-qla2xxx-fix-incorrect-display-of-max-frame-size.patch-30577
+scsi-qla2xxx-zero-undefined-mailbox-in-registers.patch-4895
+scsi-qla2xxx-fix-response-queue-handler-reading-stal.patch
+scsi-qla2xxx-edif-fix-dropped-ike-message.patch
+scsi-qla2xxx-fix-imbalance-vha-vref_count.patch-12738
+scsi-qla2xxx-fix-discovery-issues-in-fc-al-topology.patch-25366
+scsi-qla2xxx-update-manufacturer-details.patch
+scsi-sg-allow-waiting-for-commands-to-complete-on-re.patch
+iio-fix-iio_format_avail_range-printing-for-none-iio.patch
+iio-light-isl29028-fix-the-warning-in-isl29028_remov.patch
+tty-vt-initialize-unicode-screen-buffer.patch-8483
+kvm-s390-pv-don-t-present-the-ecall-interrupt-twice.patch-16826
+locking-csd_lock-change-csdlock_debug-from-early_par.patch
+block-don-t-allow-the-same-type-rq_qos-add-more-than.patch
+hid-nintendo-add-missing-array-termination.patch-24808
+fuse-write-inode-in-fuse_release.patch-28840
+fuse-fix-deadlock-between-atomic-o_trunc-and-page-in.patch
+fuse-limit-nsec.patch-2050
+fuse-ioctl-translate-enosys.patch-17448
+alsa-usb-audio-add-quirk-for-behringer-umc202hd.patch-24063
+spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch
+btrfs-reject-log-replay-if-there-is-unsupported-ro-c.patch
+btrfs-tree-log-make-the-return-value-for-log-syncing.patch
+btrfs-ensure-pages-are-unlocked-on-cow_file_range-fa.patch
+btrfs-fix-error-handling-of-fallback-uncompress-writ.patch
+btrfs-reset-block-group-chunk-force-if-we-have-to-wa.patch
+btrfs-properly-flag-filesystem-with-btrfs_feature_in.patch
+block-add-bdev_max_segments-helper.patch
+btrfs-zoned-revive-max_zone_append_bytes.patch
+btrfs-replace-btrfs_max_extent_size-with-fs_info-max.patch
+btrfs-convert-count_max_extents-to-use-fs_info-max_e.patch
+btrfs-let-can_allocate_chunk-return-error.patch
+btrfs-zoned-finish-least-available-block-group-on-da.patch
+btrfs-zoned-disable-metadata-overcommit-for-zoned.patch
+btrfs-store-chunk-size-in-space-info-struct.patch
+btrfs-zoned-introduce-space_info-active_total_bytes.patch
+btrfs-zoned-activate-metadata-block-group-on-flush_s.patch
+btrfs-zoned-activate-necessary-block-group.patch
+btrfs-zoned-write-out-partially-allocated-region.patch
+btrfs-zoned-wait-until-zone-is-finished-when-allocat.patch
+btrfs-join-running-log-transaction-when-logging-new-.patch
+intel_idle-make-spr-c1-and-c1e-be-independent.patch
+acpi-cppc-do-not-prevent-cppc-from-working-in-the-fu.patch
+powerpc-fsl-pci-fix-class-code-of-pcie-root-port.patch-7836
+usb-hcd-fix-urb-giveback-issue-in-tasklet-function.patch-24136
+usb-gadget-fix-use-after-free-read-in-usb_udc_uevent.patch-12274
+usb-typec-ucsi-acknowledge-the-get_error_status-comm.patch
+powerpc-powernv-avoid-crashing-if-rng-is-null.patch-9536
+powerpc-powernv-kvm-use-darn-for-h_random-on-power9.patch
+serial-mvebu-uart-uart2-error-bits-clearing.patch-15528
+ovl-drop-warn_on-dentry-is-null-in-ovl_encode_fh.patch-29266
+s390-unwind-fix-fgraph-return-address-recovery.patch
+kvm-x86-split-kvm_is_valid_cr4-and-export-only-the-n.patch
+kvm-nvmx-account-for-kvm-reserved-cr4-bits-in-consis.patch
+kvm-nvmx-inject-ud-if-vmxon-is-attempted-with-incomp.patch
+kvm-nvmx-let-userspace-set-nvmx-msr-to-any-_host_-su.patch
+kvm-x86-pmu-introduce-the-ctrl_mask-value-for-fixed-.patch
+kvm-vmx-mark-all-perf_global_-ovf-_ctrl-bits-reserve.patch
+kvm-x86-pmu-ignore-pmu-global_ctrl-check-if-vpmu-doe.patch
+kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when-host-i.patch
+revert-kvm-x86-pmu-accept-0-for-absent-pmu-msrs-when.patch
+kvm-vmx-add-helper-to-check-if-the-guest-pmu-has-per.patch
+kvm-nvmx-attempt-to-load-perf_global_ctrl-on-nvmx-xf.patch
+kvm-x86-mmu-treat-nx-as-a-valid-spte-bit-for-npt.patch-3797
+dm-raid-fix-address-sanitizer-warning-in-raid_status.patch
+dm-raid-fix-address-sanitizer-warning-in-raid_resume.patch
+dm-fix-dm-raid-crash-if-md_handle_request-splits-bio.patch
+mm-damon-reclaim-fix-potential-memory-leak-in-damon_.patch
+hugetlb_cgroup-fix-wrong-hugetlb-cgroup-numa-stat.patch
+batman-adv-tracing-use-the-new-__vstring-helper.patch
+ftrace-x86-add-back-ftrace_expected-assignment.patch-6434
+alsa-hda-realtek-add-quirk-for-clevo-nv45pz.patch-15916
+tracing-use-a-struct-alignof-to-determine-trace-even.patch
+csky-abiv1-fixup-compile-error.patch-25803
+ksmbd-fix-memory-leak-in-smb2_handle_negotiate.patch-5672
+ksmbd-fix-use-after-free-bug-in-smb2_tree_disconect.patch-30412
+ksmbd-prevent-out-of-bound-read-for-smb2_write.patch-20867
+ksmbd-prevent-out-of-bound-read-for-smb2_tree_connne.patch
+parisc-fix-device-names-in-proc-iomem.patch-18836
+parisc-drop-pa_swapper_pg_lock-spinlock.patch-26906
+parisc-check-the-return-value-of-ioremap-in-lba_driv.patch
+parisc-io_pgetevents_time64-needs-compat-syscall-in-.patch
+input-gscps2-check-return-value-of-ioremap-in-gscps2.patch
+x86-kprobes-update-kcb-status-flag-after-singlestepp.patch
+arm-dts-uniphier-fix-usb-interrupts-for-pxs2-soc.patch-2243
+arm64-dts-uniphier-fix-usb-interrupts-for-pxs3-soc.patch-8226
+md-raid-destroy-the-bitmap-after-destroying-the-thre.patch
+md-raid10-fix-kasan-warning.patch-1758
+ext4-fix-reading-leftover-inlined-symlinks.patch
+ext4-update-s_overhead_clusters-in-the-superblock-du.patch
+ext4-fix-extent-status-tree-race-in-writeback-error-.patch
+ext4-add-ext4_inode_has_xattr_space-macro-in-xattr.h.patch
+ext4-fix-use-after-free-in-ext4_xattr_set_entry.patch
+ext4-correct-max_inline_xattr_value_size-computing.patch
+ext4-correct-the-misjudgment-in-ext4_iget_extra_inod.patch
+ext4-fix-warning-in-ext4_iomap_begin-as-race-between.patch
+documentation-ext4-fix-cell-spacing-of-table-heading.patch
+ext4-check-if-directory-block-is-within-i_size.patch
+ext4-make-sure-ext4_append-always-allocates-new-bloc.patch
+mbcache-don-t-reclaim-used-entries.patch-21676
+mbcache-add-functions-to-delete-entry-if-unused.patch-21045
+ext4-remove-ea-inode-entry-from-mbcache-on-inode-evi.patch
+ext4-unindent-codeblock-in-ext4_xattr_block_set.patch
+ext4-fix-race-when-reusing-xattr-blocks.patch
+thermal-sysfs-fix-cooling_device_stats_setup-error-c.patch
+alsa-hda-realtek-add-quirk-for-hp-spectre-x360-15-eb.patch
+keys-asymmetric-enforce-sm2-signature-use-pkey-algo.patch
+tpm-eventlog-fix-section-mismatch-for-debug_section_.patch
+tpm-add-check-for-failure-mode-for-tpm2-modules.patch
+ksmbd-fix-heap-based-overflow-in-set_ntacl_dacl.patch-15594
+vfs-check-the-truncate-maximum-size-in-inode_newsize.patch
diff --git a/queue-5.19/soundwire-qcom-check-device-status-before-reading-de.patch b/queue-5.19/soundwire-qcom-check-device-status-before-reading-de.patch

new file mode 100644 (file)

index 0000000..5b35a9c
--- /dev/null
+++ b/queue-5.19/soundwire-qcom-check-device-status-before-reading-de.patch
@@ -0,0 +1,43 @@
+From 22279f33dd8646f63cc191b62fa6c863d0dd016b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 6 Jul 2022 10:56:44 +0100
+Subject: soundwire: qcom: Check device status before reading devid
+
+From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+[ Upstream commit aa1262ca66957183ea1fb32a067e145b995f3744 ]
+
+As per hardware datasheet its recommended that we check the device
+status before reading devid assigned by auto-enumeration.
+
+Without this patch we see SoundWire devices with invalid enumeration
+addresses on the bus.
+
+Cc: stable@vger.kernel.org
+Fixes: a6e6581942ca ("soundwire: qcom: add auto enumeration support")
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20220706095644.5852-1-srinivas.kandagatla@linaro.org
+Signed-off-by: Vinod Koul <vkoul@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/soundwire/qcom.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/soundwire/qcom.c b/drivers/soundwire/qcom.c
+index 22b706350ead..b5ec7726592c 100644
+--- a/drivers/soundwire/qcom.c
++++ b/drivers/soundwire/qcom.c
+@@ -471,6 +471,10 @@ static int qcom_swrm_enumerate(struct sdw_bus *bus)
+       char *buf1 = (char *)&val1, *buf2 = (char *)&val2;
+ 
+       for (i = 1; i <= SDW_MAX_DEVICES; i++) {
++              /* do not continue if the status is Not Present  */
++              if (!ctrl->status[i])
++                      continue;
++
+               /*SCP_Devid5 - Devid 4*/
+               ctrl->reg_read(ctrl, SWRM_ENUMERATOR_SLAVE_DEV_ID_1(i), &val1);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch b/queue-5.19/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch

new file mode 100644 (file)

index 0000000..dd73534
--- /dev/null
+++ b/queue-5.19/spmi-trace-fix-stack-out-of-bound-access-in-spmi-tra.patch
@@ -0,0 +1,115 @@
+From 2d56df30d76af97123d66b3f131f270e6d686b5d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 27 Jun 2022 16:55:12 -0700
+Subject: spmi: trace: fix stack-out-of-bound access in SPMI tracing functions
+
+From: David Collins <quic_collinsd@quicinc.com>
+
+[ Upstream commit 2af28b241eea816e6f7668d1954f15894b45d7e3 ]
+
+trace_spmi_write_begin() and trace_spmi_read_end() both call
+memcpy() with a length of "len + 1".  This leads to one extra
+byte being read beyond the end of the specified buffer.  Fix
+this out-of-bound memory access by using a length of "len"
+instead.
+
+Here is a KASAN log showing the issue:
+
+BUG: KASAN: stack-out-of-bounds in trace_event_raw_event_spmi_read_end+0x1d0/0x234
+Read of size 2 at addr ffffffc0265b7540 by task thermal@2.0-ser/1314
+...
+Call trace:
+ dump_backtrace+0x0/0x3e8
+ show_stack+0x2c/0x3c
+ dump_stack_lvl+0xdc/0x11c
+ print_address_description+0x74/0x384
+ kasan_report+0x188/0x268
+ kasan_check_range+0x270/0x2b0
+ memcpy+0x90/0xe8
+ trace_event_raw_event_spmi_read_end+0x1d0/0x234
+ spmi_read_cmd+0x294/0x3ac
+ spmi_ext_register_readl+0x84/0x9c
+ regmap_spmi_ext_read+0x144/0x1b0 [regmap_spmi]
+ _regmap_raw_read+0x40c/0x754
+ regmap_raw_read+0x3a0/0x514
+ regmap_bulk_read+0x418/0x494
+ adc5_gen3_poll_wait_hs+0xe8/0x1e0 [qcom_spmi_adc5_gen3]
+ ...
+ __arm64_sys_read+0x4c/0x60
+ invoke_syscall+0x80/0x218
+ el0_svc_common+0xec/0x1c8
+ ...
+
+addr ffffffc0265b7540 is located in stack of task thermal@2.0-ser/1314 at offset 32 in frame:
+ adc5_gen3_poll_wait_hs+0x0/0x1e0 [qcom_spmi_adc5_gen3]
+
+this frame has 1 object:
+ [32, 33) 'status'
+
+Memory state around the buggy address:
+ ffffffc0265b7400: 00 00 00 00 00 00 00 00 00 00 00 00 f1 f1 f1 f1
+ ffffffc0265b7480: 04 f3 f3 f3 00 00 00 00 00 00 00 00 00 00 00 00
+>ffffffc0265b7500: 00 00 00 00 f1 f1 f1 f1 01 f3 f3 f3 00 00 00 00
+                                           ^
+ ffffffc0265b7580: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ ffffffc0265b7600: f1 f1 f1 f1 01 f2 07 f2 f2 f2 01 f3 00 00 00 00
+==================================================================
+
+Fixes: a9fce374815d ("spmi: add command tracepoints for SPMI")
+Cc: stable@vger.kernel.org
+Reviewed-by: Stephen Boyd <sboyd@kernel.org>
+Acked-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: David Collins <quic_collinsd@quicinc.com>
+Link: https://lore.kernel.org/r/20220627235512.2272783-1-quic_collinsd@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/events/spmi.h | 12 ++++++------
+ 1 file changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/include/trace/events/spmi.h b/include/trace/events/spmi.h
+index 8b60efe18ba6..a6819fd85cdf 100644
+--- a/include/trace/events/spmi.h
++++ b/include/trace/events/spmi.h
+@@ -21,15 +21,15 @@ TRACE_EVENT(spmi_write_begin,
+               __field         ( u8,         sid       )
+               __field         ( u16,        addr      )
+               __field         ( u8,         len       )
+-              __dynamic_array ( u8,   buf,  len + 1   )
++              __dynamic_array ( u8,   buf,  len       )
+       ),
+ 
+       TP_fast_assign(
+               __entry->opcode = opcode;
+               __entry->sid    = sid;
+               __entry->addr   = addr;
+-              __entry->len    = len + 1;
+-              memcpy(__get_dynamic_array(buf), buf, len + 1);
++              __entry->len    = len;
++              memcpy(__get_dynamic_array(buf), buf, len);
+       ),
+ 
+       TP_printk("opc=%d sid=%02d addr=0x%04x len=%d buf=0x[%*phD]",
+@@ -92,7 +92,7 @@ TRACE_EVENT(spmi_read_end,
+               __field         ( u16,        addr      )
+               __field         ( int,        ret       )
+               __field         ( u8,         len       )
+-              __dynamic_array ( u8,   buf,  len + 1   )
++              __dynamic_array ( u8,   buf,  len       )
+       ),
+ 
+       TP_fast_assign(
+@@ -100,8 +100,8 @@ TRACE_EVENT(spmi_read_end,
+               __entry->sid    = sid;
+               __entry->addr   = addr;
+               __entry->ret    = ret;
+-              __entry->len    = len + 1;
+-              memcpy(__get_dynamic_array(buf), buf, len + 1);
++              __entry->len    = len;
++              memcpy(__get_dynamic_array(buf), buf, len);
+       ),
+ 
+       TP_printk("opc=%d sid=%02d addr=0x%04x ret=%d len=%02d buf=0x[%*phD]",
+-- 
+2.35.1
+
diff --git a/queue-5.19/thermal-sysfs-fix-cooling_device_stats_setup-error-c.patch b/queue-5.19/thermal-sysfs-fix-cooling_device_stats_setup-error-c.patch

new file mode 100644 (file)

index 0000000..05cd032
--- /dev/null
+++ b/queue-5.19/thermal-sysfs-fix-cooling_device_stats_setup-error-c.patch
@@ -0,0 +1,74 @@
+From 801cc2015d8cbd8a81277013b3c4ec0b643d3a2a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Jul 2022 17:39:07 +0200
+Subject: thermal: sysfs: Fix cooling_device_stats_setup() error code path
+
+From: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+
+[ Upstream commit d5a8aa5d7d80d21ab6b266f1bed4194b61746199 ]
+
+If cooling_device_stats_setup() fails to create the stats object, it
+must clear the last slot in cooling_device_attr_groups that was
+initially empty (so as to make it possible to add stats attributes to
+the cooling device attribute groups).
+
+Failing to do so may cause the stats attributes to be created by
+mistake for a device that doesn't have a stats object, because the
+slot in question might be populated previously during the registration
+of another cooling device.
+
+Fixes: 8ea229511e06 ("thermal: Add cooling device's statistics in sysfs")
+Reported-by: Di Shen <di.shen@unisoc.com>
+Tested-by: Di Shen <di.shen@unisoc.com>
+Cc: 4.17+ <stable@vger.kernel.org> # 4.17+
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/thermal/thermal_sysfs.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/thermal/thermal_sysfs.c b/drivers/thermal/thermal_sysfs.c
+index 1c4aac8464a7..1e5a78131aba 100644
+--- a/drivers/thermal/thermal_sysfs.c
++++ b/drivers/thermal/thermal_sysfs.c
+@@ -813,12 +813,13 @@ static const struct attribute_group cooling_device_stats_attr_group = {
+ 
+ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+ {
++      const struct attribute_group *stats_attr_group = NULL;
+       struct cooling_dev_stats *stats;
+       unsigned long states;
+       int var;
+ 
+       if (cdev->ops->get_max_state(cdev, &states))
+-              return;
++              goto out;
+ 
+       states++; /* Total number of states is highest state + 1 */
+ 
+@@ -828,7 +829,7 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+ 
+       stats = kzalloc(var, GFP_KERNEL);
+       if (!stats)
+-              return;
++              goto out;
+ 
+       stats->time_in_state = (ktime_t *)(stats + 1);
+       stats->trans_table = (unsigned int *)(stats->time_in_state + states);
+@@ -838,9 +839,12 @@ static void cooling_device_stats_setup(struct thermal_cooling_device *cdev)
+ 
+       spin_lock_init(&stats->lock);
+ 
++      stats_attr_group = &cooling_device_stats_attr_group;
++
++out:
+       /* Fill the empty slot left in cooling_device_attr_groups */
+       var = ARRAY_SIZE(cooling_device_attr_groups) - 2;
+-      cooling_device_attr_groups[var] = &cooling_device_stats_attr_group;
++      cooling_device_attr_groups[var] = stats_attr_group;
+ }
+ 
+ static void cooling_device_stats_destroy(struct thermal_cooling_device *cdev)
+-- 
+2.35.1
+
diff --git a/queue-5.19/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch b/queue-5.19/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch

new file mode 100644 (file)

index 0000000..11af7a1
--- /dev/null
+++ b/queue-5.19/timekeeping-contribute-wall-clock-to-rng-on-time-cha.patch
@@ -0,0 +1,74 @@
+From c060e83b71bfec7ae84a6e9420be2515315002e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 17 Jul 2022 23:53:34 +0200
+Subject: timekeeping: contribute wall clock to rng on time change
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit b8ac29b40183a6038919768b5d189c9bd91ce9b4 ]
+
+The rng's random_init() function contributes the real time to the rng at
+boot time, so that events can at least start in relation to something
+particular in the real world. But this clock might not yet be set that
+point in boot, so nothing is contributed. In addition, the relation
+between minor clock changes from, say, NTP, and the cycle counter is
+potentially useful entropic data.
+
+This commit addresses this by mixing in a time stamp on calls to
+settimeofday and adjtimex. No entropy is credited in doing so, so it
+doesn't make initialization faster, but it is still useful input to
+have.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Cc: stable@vger.kernel.org
+Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
+Reviewed-by: Eric Biggers <ebiggers@google.com>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/time/timekeeping.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 8e4b3c32fcf9..f72b9f1de178 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -23,6 +23,7 @@
+ #include <linux/pvclock_gtod.h>
+ #include <linux/compiler.h>
+ #include <linux/audit.h>
++#include <linux/random.h>
+ 
+ #include "tick-internal.h"
+ #include "ntp_internal.h"
+@@ -1343,8 +1344,10 @@ int do_settimeofday64(const struct timespec64 *ts)
+       /* Signal hrtimers about time change */
+       clock_was_set(CLOCK_SET_WALL);
+ 
+-      if (!ret)
++      if (!ret) {
+               audit_tk_injoffset(ts_delta);
++              add_device_randomness(ts, sizeof(*ts));
++      }
+ 
+       return ret;
+ }
+@@ -2430,6 +2433,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+       ret = timekeeping_validate_timex(txc);
+       if (ret)
+               return ret;
++      add_device_randomness(txc, sizeof(*txc));
+ 
+       if (txc->modes & ADJ_SETOFFSET) {
+               struct timespec64 delta;
+@@ -2447,6 +2451,7 @@ int do_adjtimex(struct __kernel_timex *txc)
+       audit_ntp_init(&ad);
+ 
+       ktime_get_real_ts64(&ts);
++      add_device_randomness(&ts, sizeof(ts));
+ 
+       raw_spin_lock_irqsave(&timekeeper_lock, flags);
+       write_seqcount_begin(&tk_core.seq);
+-- 
+2.35.1
+
diff --git a/queue-5.19/tpm-add-check-for-failure-mode-for-tpm2-modules.patch b/queue-5.19/tpm-add-check-for-failure-mode-for-tpm2-modules.patch

new file mode 100644 (file)

index 0000000..52f3c9d
--- /dev/null
+++ b/queue-5.19/tpm-add-check-for-failure-mode-for-tpm2-modules.patch
@@ -0,0 +1,54 @@
+From c479aa67954af70f49e02e0d32de3b6956a041e3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 1 Aug 2022 15:57:03 +0200
+Subject: tpm: Add check for Failure mode for TPM2 modules
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Mårten Lindahl <marten.lindahl@axis.com>
+
+[ Upstream commit 863ed94c589fcd1984f4e3080f069d30508044bb ]
+
+In commit 0aa698787aa2 ("tpm: Add Upgrade/Reduced mode support for
+TPM2 modules") it was said that:
+
+"If the TPM is in Failure mode, it will successfully respond to both
+tpm2_do_selftest() and tpm2_startup() calls. Although, will fail to
+answer to tpm2_get_cc_attrs_tbl(). Use this fact to conclude that TPM
+is in Failure mode."
+
+But a check was never added in the commit when calling
+tpm2_get_cc_attrs_tbl() to conclude that the TPM is in Failure mode.
+This commit corrects this by adding a check.
+
+Fixes: 0aa698787aa2 ("tpm: Add Upgrade/Reduced mode support for TPM2 modules")
+Cc: stable@vger.kernel.org # v5.17+
+Signed-off-by: Mårten Lindahl <marten.lindahl@axis.com>
+Reviewed-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/char/tpm/tpm2-cmd.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/drivers/char/tpm/tpm2-cmd.c b/drivers/char/tpm/tpm2-cmd.c
+index c1eb5d223839..65d03867e114 100644
+--- a/drivers/char/tpm/tpm2-cmd.c
++++ b/drivers/char/tpm/tpm2-cmd.c
+@@ -752,6 +752,12 @@ int tpm2_auto_startup(struct tpm_chip *chip)
+       }
+ 
+       rc = tpm2_get_cc_attrs_tbl(chip);
++      if (rc == TPM2_RC_FAILURE || (rc < 0 && rc != -ENOMEM)) {
++              dev_info(&chip->dev,
++                       "TPM in field failure mode, requires firmware upgrade\n");
++              chip->flags |= TPM_CHIP_FLAG_FIRMWARE_UPGRADE;
++              rc = 0;
++      }
+ 
+ out:
+       /*
+-- 
+2.35.1
+
diff --git a/queue-5.19/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch b/queue-5.19/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch

new file mode 100644 (file)

index 0000000..4b9ebb8
--- /dev/null
+++ b/queue-5.19/tpm-eventlog-fix-section-mismatch-for-debug_section_.patch
@@ -0,0 +1,47 @@
+From 7063333acc22bb48491d290836f5ff9e29b49302 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 09:17:38 +0800
+Subject: tpm: eventlog: Fix section mismatch for DEBUG_SECTION_MISMATCH
+
+From: Huacai Chen <chenhuacai@loongson.cn>
+
+[ Upstream commit bed4593645366ad7362a3aa7bc0d100d8d8236a8 ]
+
+If DEBUG_SECTION_MISMATCH enabled, __calc_tpm2_event_size() will not be
+inlined, this cause section mismatch like this:
+
+WARNING: modpost: vmlinux.o(.text.unlikely+0xe30c): Section mismatch in reference from the variable L0 to the function .init.text:early_ioremap()
+The function L0() references
+the function __init early_memremap().
+This is often because L0 lacks a __init
+annotation or the annotation of early_ioremap is wrong.
+
+Fix it by using __always_inline instead of inline for the called-once
+function __calc_tpm2_event_size().
+
+Fixes: 44038bc514a2 ("tpm: Abstract crypto agile event size calculations")
+Cc: stable@vger.kernel.org # v5.3
+Reported-by: WANG Xuerui <git@xen0n.name>
+Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tpm_eventlog.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/linux/tpm_eventlog.h b/include/linux/tpm_eventlog.h
+index 739ba9a03ec1..20c0ff54b7a0 100644
+--- a/include/linux/tpm_eventlog.h
++++ b/include/linux/tpm_eventlog.h
+@@ -157,7 +157,7 @@ struct tcg_algorithm_info {
+  * Return: size of the event on success, 0 on failure
+  */
+ 
+-static inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
++static __always_inline int __calc_tpm2_event_size(struct tcg_pcr_event2_head *event,
+                                        struct tcg_pcr_event *event_header,
+                                        bool do_mapping)
+ {
+-- 
+2.35.1
+
diff --git a/queue-5.19/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch b/queue-5.19/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch

new file mode 100644 (file)

index 0000000..3b66b36
--- /dev/null
+++ b/queue-5.19/tracing-events-add-__vstring-and-__assign_vstr-helpe.patch
@@ -0,0 +1,196 @@
+From 4a3876e98ee156908aa616d4bd04a26cc8b06e86 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 5 Jul 2022 18:44:54 -0400
+Subject: tracing/events: Add __vstring() and __assign_vstr() helper macros
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 0563231f93c6d1f582b168a47753b345c1e20d81 ]
+
+There's several places that open code the following logic:
+
+  TP_STRUCT__entry(__dynamic_array(char, msg, MSG_MAX)),
+  TP_fast_assign(vsnprintf(__get_str(msg), MSG_MAX, vaf->fmt, *vaf->va);)
+
+To load a string created by variable array va_list.
+
+The main issue with this approach is that "MSG_MAX" usage in the
+__dynamic_array() portion. That actually just reserves the MSG_MAX in the
+event, and even wastes space because there's dynamic meta data also saved
+in the event to denote the offset and size of the dynamic array. It would
+have been better to just use a static __array() field.
+
+Instead, create __vstring() and __assign_vstr() that work like __string
+and __assign_str() but instead of taking a destination string to copy,
+take a format string and a va_list pointer and fill in the values.
+
+It uses the helper:
+
+ #define __trace_event_vstr_len(fmt, va)               \
+ ({                                                    \
+       va_list __ap;                                   \
+       int __ret;                                      \
+                                                       \
+       va_copy(__ap, *(va));                           \
+       __ret = vsnprintf(NULL, 0, fmt, __ap) + 1;      \
+       va_end(__ap);                                   \
+                                                       \
+       min(__ret, TRACE_EVENT_STR_MAX);                \
+ })
+
+To figure out the length to store the string. It may be slightly slower as
+it needs to run the vsnprintf() twice, but it now saves space on the ring
+buffer.
+
+Link: https://lkml.kernel.org/r/20220705224749.053570613@goodmis.org
+
+Cc: Dennis Dalessandro <dennis.dalessandro@cornelisnetworks.com>
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Jason Gunthorpe <jgg@ziepe.ca>
+Cc: Leon Romanovsky <leon@kernel.org>
+Cc: Kalle Valo <kvalo@kernel.org>
+Cc: "David S. Miller" <davem@davemloft.net>
+Cc: Eric Dumazet <edumazet@google.com>
+Cc: Jakub Kicinski <kuba@kernel.org>
+Cc: Paolo Abeni <pabeni@redhat.com>
+Cc: Arend van Spriel <aspriel@gmail.com>
+Cc: Franky Lin <franky.lin@broadcom.com>
+Cc: Hante Meuleman <hante.meuleman@broadcom.com>
+Cc: Gregory Greenman <gregory.greenman@intel.com>
+Cc: Peter Chen <peter.chen@kernel.org>
+Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Cc: Mathias Nyman <mathias.nyman@intel.com>
+Cc: Chunfeng Yun <chunfeng.yun@mediatek.com>
+Cc: Bin Liu <b-liu@ti.com>
+Cc: Marek Lindner <mareklindner@neomailbox.ch>
+Cc: Simon Wunderlich <sw@simonwunderlich.de>
+Cc: Antonio Quartulli <a@unstable.cc>
+Cc: Sven Eckelmann <sven@narfation.org>
+Cc: Johannes Berg <johannes@sipsolutions.net>
+Cc: Jim Cromie <jim.cromie@gmail.com>
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/trace_events.h                 | 18 ++++++++++++++++++
+ include/trace/stages/stage1_struct_define.h  |  3 +++
+ include/trace/stages/stage2_data_offsets.h   |  3 +++
+ include/trace/stages/stage4_event_fields.h   |  3 +++
+ include/trace/stages/stage5_get_offsets.h    |  4 ++++
+ include/trace/stages/stage6_event_callback.h |  7 +++++++
+ 6 files changed, 38 insertions(+)
+
+diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
+index e6e95a9f07a5..b18759a673c6 100644
+--- a/include/linux/trace_events.h
++++ b/include/linux/trace_events.h
+@@ -916,6 +916,24 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type,
+ 
+ #endif
+ 
++#define TRACE_EVENT_STR_MAX   512
++
++/*
++ * gcc warns that you can not use a va_list in an inlined
++ * function. But lets me make it into a macro :-/
++ */
++#define __trace_event_vstr_len(fmt, va)                       \
++({                                                    \
++      va_list __ap;                                   \
++      int __ret;                                      \
++                                                      \
++      va_copy(__ap, *(va));                           \
++      __ret = vsnprintf(NULL, 0, fmt, __ap) + 1;      \
++      va_end(__ap);                                   \
++                                                      \
++      min(__ret, TRACE_EVENT_STR_MAX);                \
++})
++
+ #endif /* _LINUX_TRACE_EVENT_H */
+ 
+ /*
+diff --git a/include/trace/stages/stage1_struct_define.h b/include/trace/stages/stage1_struct_define.h
+index a16783419687..1b7bab60434c 100644
+--- a/include/trace/stages/stage1_struct_define.h
++++ b/include/trace/stages/stage1_struct_define.h
+@@ -26,6 +26,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
+ 
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(char, item, -1)
+ 
+diff --git a/include/trace/stages/stage2_data_offsets.h b/include/trace/stages/stage2_data_offsets.h
+index 42fd1e8813ec..1b7a8f764fdd 100644
+--- a/include/trace/stages/stage2_data_offsets.h
++++ b/include/trace/stages/stage2_data_offsets.h
+@@ -32,6 +32,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
+ 
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+ 
+diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
+index e80cdc397a43..c3790ec7a453 100644
+--- a/include/trace/stages/stage4_event_fields.h
++++ b/include/trace/stages/stage4_event_fields.h
+@@ -38,6 +38,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
+ 
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+ 
+diff --git a/include/trace/stages/stage5_get_offsets.h b/include/trace/stages/stage5_get_offsets.h
+index 7ee5931300e6..fba4c24ed9e6 100644
+--- a/include/trace/stages/stage5_get_offsets.h
++++ b/include/trace/stages/stage5_get_offsets.h
+@@ -39,6 +39,10 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, (len) + 1)
+ 
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item,          \
++                    __trace_event_vstr_len(fmt, ap))
++
+ #undef __rel_dynamic_array
+ #define __rel_dynamic_array(type, item, len)                          \
+       __item_length = (len) * sizeof(type);                           \
+diff --git a/include/trace/stages/stage6_event_callback.h b/include/trace/stages/stage6_event_callback.h
+index e1724f73594b..0f51f6b3ab70 100644
+--- a/include/trace/stages/stage6_event_callback.h
++++ b/include/trace/stages/stage6_event_callback.h
+@@ -24,6 +24,9 @@
+ #undef __string_len
+ #define __string_len(item, src, len) __dynamic_array(char, item, -1)
+ 
++#undef __vstring
++#define __vstring(item, fmt, ap) __dynamic_array(char, item, -1)
++
+ #undef __assign_str
+ #define __assign_str(dst, src)                                                \
+       strcpy(__get_str(dst), (src) ? (const char *)(src) : "(null)");
+@@ -35,6 +38,10 @@
+               __get_str(dst)[len] = '\0';                             \
+       } while(0)
+ 
++#undef __assign_vstr
++#define __assign_vstr(dst, fmt, va)                                   \
++      vsnprintf(__get_str(dst), TRACE_EVENT_STR_MAX, fmt, *(va))
++
+ #undef __bitmask
+ #define __bitmask(item, nr_bits) __dynamic_array(unsigned long, item, -1)
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/tracing-use-a-struct-alignof-to-determine-trace-even.patch b/queue-5.19/tracing-use-a-struct-alignof-to-determine-trace-even.patch

new file mode 100644 (file)

index 0000000..36993ca
--- /dev/null
+++ b/queue-5.19/tracing-use-a-struct-alignof-to-determine-trace-even.patch
@@ -0,0 +1,80 @@
+From 017769f9a2fa4c4aa149d6b921b07fd804b701fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 31 Jul 2022 01:59:28 -0400
+Subject: tracing: Use a struct alignof to determine trace event field
+ alignment
+
+From: Steven Rostedt (Google) <rostedt@goodmis.org>
+
+[ Upstream commit 4c3d2f9388d36eb28640a220a6f908328442d873 ]
+
+alignof() gives an alignment of types as they would be as standalone
+variables. But alignment in structures might be different, and when
+building the fields of events, the alignment must be the actual
+alignment otherwise the field offsets may not match what they actually
+are.
+
+This caused trace-cmd to crash, as libtraceevent did not check if the
+field offset was bigger than the event. The write_msr and read_msr
+events on 32 bit had their fields incorrect, because it had a u64 field
+between two ints. alignof(u64) would give 8, but the u64 field was at a
+4 byte alignment.
+
+Define a macro as:
+
+   ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b)))
+
+which gives the actual alignment of types in a structure.
+
+Link: https://lkml.kernel.org/r/20220731015928.7ab3a154@rorschach.local.home
+
+Cc: Ingo Molnar <mingo@kernel.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Thomas Gleixner <tglx@linutronix.de>
+Cc: Masami Hiramatsu <mhiramat@kernel.org>
+Cc: stable@vger.kernel.org
+Fixes: 04ae87a52074e ("ftrace: Rework event_create_dir()")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/trace/stages/stage4_event_fields.h | 8 +++++---
+ 1 file changed, 5 insertions(+), 3 deletions(-)
+
+diff --git a/include/trace/stages/stage4_event_fields.h b/include/trace/stages/stage4_event_fields.h
+index c3790ec7a453..80d34f396555 100644
+--- a/include/trace/stages/stage4_event_fields.h
++++ b/include/trace/stages/stage4_event_fields.h
+@@ -2,16 +2,18 @@
+ 
+ /* Stage 4 definitions for creating trace events */
+ 
++#define ALIGN_STRUCTFIELD(type) ((int)(offsetof(struct {char a; type b;}, b)))
++
+ #undef __field_ext
+ #define __field_ext(_type, _item, _filter_type) {                     \
+       .type = #_type, .name = #_item,                                 \
+-      .size = sizeof(_type), .align = __alignof__(_type),             \
++      .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type),       \
+       .is_signed = is_signed_type(_type), .filter_type = _filter_type },
+ 
+ #undef __field_struct_ext
+ #define __field_struct_ext(_type, _item, _filter_type) {              \
+       .type = #_type, .name = #_item,                                 \
+-      .size = sizeof(_type), .align = __alignof__(_type),             \
++      .size = sizeof(_type), .align = ALIGN_STRUCTFIELD(_type),       \
+       0, .filter_type = _filter_type },
+ 
+ #undef __field
+@@ -23,7 +25,7 @@
+ #undef __array
+ #define __array(_type, _item, _len) {                                 \
+       .type = #_type"["__stringify(_len)"]", .name = #_item,          \
+-      .size = sizeof(_type[_len]), .align = __alignof__(_type),       \
++      .size = sizeof(_type[_len]), .align = ALIGN_STRUCTFIELD(_type), \
+       .is_signed = is_signed_type(_type), .filter_type = FILTER_OTHER },
+ 
+ #undef __dynamic_array
+-- 
+2.35.1
+
diff --git a/queue-5.19/tty-8250-add-support-for-brainboxes-px-cards.patch-25863 b/queue-5.19/tty-8250-add-support-for-brainboxes-px-cards.patch-25863

new file mode 100644 (file)

index 0000000..ff5f560
--- /dev/null
+++ b/queue-5.19/tty-8250-add-support-for-brainboxes-px-cards.patch-25863
@@ -0,0 +1,147 @@
+From d5a779b3947b0de53727a97e7a50c53e27258d0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 11 Jul 2022 16:35:10 +0100
+Subject: tty: 8250: Add support for Brainboxes PX cards.
+
+From: Cameron Williams <cang1@live.co.uk>
+
+[ Upstream commit ef5a03a26c87a760bc3d86b5af7b773e82f8b1b7 ]
+
+Add support for some of the Brainboxes PCIe (PX) range of
+serial cards, including the PX-101, PX-235/PX-246,
+PX-203/PX-257, PX-260/PX-701, PX-310, PX-313,
+PX-320/PX-324/PX-376/PX-387, PX-335/PX-346, PX-368, PX-420,
+PX-803 and PX-846.
+
+Signed-off-by: Cameron Williams <cang1@live.co.uk>
+Cc: stable <stable@kernel.org>
+Link: https://lore.kernel.org/r/AM5PR0202MB2564669252BDC59BF55A6E87C4879@AM5PR0202MB2564.eurprd02.prod.outlook.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/8250/8250_pci.c | 109 +++++++++++++++++++++++++++++
+ 1 file changed, 109 insertions(+)
+
+diff --git a/drivers/tty/serial/8250/8250_pci.c b/drivers/tty/serial/8250/8250_pci.c
+index a17619db7939..f6732c1ed238 100644
+--- a/drivers/tty/serial/8250/8250_pci.c
++++ b/drivers/tty/serial/8250/8250_pci.c
+@@ -5076,6 +5076,115 @@ static const struct pci_device_id serial_pci_tbl[] = {
+               PCI_ANY_ID, PCI_ANY_ID,
+               0, 0,
+               pbn_b2_4_115200 },
++      /*
++       * Brainboxes PX-101
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4005,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_2_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4019,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-235/246
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4004,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4016,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-203/PX-257
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4006,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_2_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4015,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-260/PX-701
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400A,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-310
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400E,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-313
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400C,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_2_15625000 },
++      /*
++       * Brainboxes PX-320/324/PX-376/PX-387
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400B,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-335/346
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x400F,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-368
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4010,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-420
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4000,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_4_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4011,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_4_15625000 },
++      /*
++       * Brainboxes PX-803
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4009,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x401E,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++      /*
++       * Brainboxes PX-846
++       */
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4008,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_b0_1_115200 },
++      {       PCI_VENDOR_ID_INTASHIELD, 0x4017,
++              PCI_ANY_ID, PCI_ANY_ID,
++              0, 0,
++              pbn_oxsemi_1_15625000 },
++
+       /*
+        * Perle PCI-RAS cards
+        */
+-- 
+2.35.1
+
diff --git a/queue-5.19/tty-vt-initialize-unicode-screen-buffer.patch-8483 b/queue-5.19/tty-vt-initialize-unicode-screen-buffer.patch-8483

new file mode 100644 (file)

index 0000000..62812af
--- /dev/null
+++ b/queue-5.19/tty-vt-initialize-unicode-screen-buffer.patch-8483
@@ -0,0 +1,57 @@
+From 4ba55f6cee68a9d823d68a382f70be58049709e0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 19 Jul 2022 14:49:39 +0900
+Subject: tty: vt: initialize unicode screen buffer
+
+From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+
+[ Upstream commit af77c56aa35325daa2bc2bed5c2ebf169be61b86 ]
+
+syzbot reports kernel infoleak at vcs_read() [1], for buffer can be read
+immediately after resize operation. Initialize buffer using kzalloc().
+
+  ----------
+  #include <fcntl.h>
+  #include <unistd.h>
+  #include <sys/ioctl.h>
+  #include <linux/fb.h>
+
+  int main(int argc, char *argv[])
+  {
+    struct fb_var_screeninfo var = { };
+    const int fb_fd = open("/dev/fb0", 3);
+    ioctl(fb_fd, FBIOGET_VSCREENINFO, &var);
+    var.yres = 0x21;
+    ioctl(fb_fd, FBIOPUT_VSCREENINFO, &var);
+    return read(open("/dev/vcsu", O_RDONLY), &var, sizeof(var)) == -1;
+  }
+  ----------
+
+Link: https://syzkaller.appspot.com/bug?extid=31a641689d43387f05d3 [1]
+Cc: stable <stable@vger.kernel.org>
+Reported-by: syzbot <syzbot+31a641689d43387f05d3@syzkaller.appspotmail.com>
+Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
+Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
+Link: https://lore.kernel.org/r/4ef053cf-e796-fb5e-58b7-3ae58242a4ad@I-love.SAKURA.ne.jp
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/vt/vt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c
+index dfc1f4b445f3..6eaf8eb84661 100644
+--- a/drivers/tty/vt/vt.c
++++ b/drivers/tty/vt/vt.c
+@@ -344,7 +344,7 @@ static struct uni_screen *vc_uniscr_alloc(unsigned int cols, unsigned int rows)
+       /* allocate everything in one go */
+       memsize = cols * rows * sizeof(char32_t);
+       memsize += rows * sizeof(char32_t *);
+-      p = vmalloc(memsize);
++      p = vzalloc(memsize);
+       if (!p)
+               return NULL;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/um-remove-straying-parenthesis.patch-5379 b/queue-5.19/um-remove-straying-parenthesis.patch-5379

new file mode 100644 (file)

index 0000000..c876bb3
--- /dev/null
+++ b/queue-5.19/um-remove-straying-parenthesis.patch-5379
@@ -0,0 +1,40 @@
+From a6a0f18473e1a64bfda2f4a192f3692fd833716c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 31 May 2022 11:17:39 +0000
+Subject: um: Remove straying parenthesis
+
+From: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
+
+[ Upstream commit c6496e0a4a90d8149203c16323cff3fa46e422e7 ]
+
+Commit e3a33af812c6 ("um: fix and optimize xor select template for CONFIG64 and timetravel mode")
+caused a build regression when CONFIG_XOR_BLOCKS and CONFIG_UML_TIME_TRAVEL_SUPPORT
+are selected.
+Fix it by removing the straying parenthesis.
+
+Cc: stable@vger.kernel.org
+Fixes: e3a33af812c6 ("um: fix and optimize xor select template for CONFIG64 and timetravel mode")
+Signed-off-by: Benjamin Beichler <benjamin.beichler@uni-rostock.de>
+[rw: Added commit message]
+Signed-off-by: Richard Weinberger <richard@nod.at>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/include/asm/xor.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/um/include/asm/xor.h b/arch/um/include/asm/xor.h
+index 22b39de73c24..647fae200c5d 100644
+--- a/arch/um/include/asm/xor.h
++++ b/arch/um/include/asm/xor.h
+@@ -18,7 +18,7 @@
+ #undef XOR_SELECT_TEMPLATE
+ /* pick an arbitrary one - measuring isn't possible with inf-cpu */
+ #define XOR_SELECT_TEMPLATE(x)        \
+-      (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x))
++      (time_travel_mode == TT_MODE_INFCPU ? TT_CPU_INF_XOR_DEFAULT : x)
+ #endif
+ 
+ #endif
+-- 
+2.35.1
+
diff --git a/queue-5.19/um-seed-rng-using-host-os-rng.patch-8415 b/queue-5.19/um-seed-rng-using-host-os-rng.patch-8415

new file mode 100644 (file)

index 0000000..2a2fd9a
--- /dev/null
+++ b/queue-5.19/um-seed-rng-using-host-os-rng.patch-8415
@@ -0,0 +1,163 @@
+From 2e26ddf816692690d73af68c3eb552e320ca3e2b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 13 Jul 2022 01:12:21 +0200
+Subject: um: seed rng using host OS rng
+
+From: Jason A. Donenfeld <Jason@zx2c4.com>
+
+[ Upstream commit 0b9ba6135d7f18b82f3d8bebb55ded725ba88e0e ]
+
+UML generally does not provide access to special CPU instructions like
+RDRAND, and execution tends to be rather deterministic, with no real
+hardware interrupts, making good randomness really very hard, if not
+all together impossible. Not only is this a security eyebrow raiser, but
+it's also quite annoying when trying to do various pieces of UML-based
+automation that takes a long time to boot, if ever.
+
+Fix this by trivially calling getrandom() in the host and using that
+seed as "bootloader randomness", which initializes the rng immediately
+at UML boot.
+
+The old behavior can be restored the same way as on any other arch, by
+way of CONFIG_TRUST_BOOTLOADER_RANDOMNESS=n or
+random.trust_bootloader=0. So seen from that perspective, this just
+makes UML act like other archs, which is positive in its own right.
+
+Additionally, wire up arch_get_random_{int,long}() in the same way, so
+that reseeds can also make use of the host RNG, controllable by
+CONFIG_TRUST_CPU_RANDOMNESS and random.trust_cpu, per usual.
+
+Cc: stable@vger.kernel.org
+Acked-by: Johannes Berg <johannes@sipsolutions.net>
+Acked-By: Anton Ivanov <anton.ivanov@cambridgegreys.com>
+Signed-off-by: Jason A. Donenfeld <Jason@zx2c4.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/um/include/asm/archrandom.h | 30 ++++++++++++++++++++++++++++++
+ arch/um/include/shared/os.h      |  7 +++++++
+ arch/um/kernel/um_arch.c         |  8 ++++++++
+ arch/um/os-Linux/util.c          |  6 ++++++
+ 4 files changed, 51 insertions(+)
+ create mode 100644 arch/um/include/asm/archrandom.h
+
+diff --git a/arch/um/include/asm/archrandom.h b/arch/um/include/asm/archrandom.h
+new file mode 100644
+index 000000000000..2f24cb96391d
+--- /dev/null
++++ b/arch/um/include/asm/archrandom.h
+@@ -0,0 +1,30 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++#ifndef __ASM_UM_ARCHRANDOM_H__
++#define __ASM_UM_ARCHRANDOM_H__
++
++#include <linux/types.h>
++
++/* This is from <os.h>, but better not to #include that in a global header here. */
++ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
++
++static inline bool __must_check arch_get_random_long(unsigned long *v)
++{
++      return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
++}
++
++static inline bool __must_check arch_get_random_int(unsigned int *v)
++{
++      return os_getrandom(v, sizeof(*v), 0) == sizeof(*v);
++}
++
++static inline bool __must_check arch_get_random_seed_long(unsigned long *v)
++{
++      return false;
++}
++
++static inline bool __must_check arch_get_random_seed_int(unsigned int *v)
++{
++      return false;
++}
++
++#endif
+diff --git a/arch/um/include/shared/os.h b/arch/um/include/shared/os.h
+index fafde1d5416e..0df646c6651e 100644
+--- a/arch/um/include/shared/os.h
++++ b/arch/um/include/shared/os.h
+@@ -11,6 +11,12 @@
+ #include <irq_user.h>
+ #include <longjmp.h>
+ #include <mm_id.h>
++/* This is to get size_t */
++#ifndef __UM_HOST__
++#include <linux/types.h>
++#else
++#include <sys/types.h>
++#endif
+ 
+ #define CATCH_EINTR(expr) while ((errno = 0, ((expr) < 0)) && (errno == EINTR))
+ 
+@@ -243,6 +249,7 @@ extern void stack_protections(unsigned long address);
+ extern int raw(int fd);
+ extern void setup_machinename(char *machine_out);
+ extern void setup_hostinfo(char *buf, int len);
++extern ssize_t os_getrandom(void *buf, size_t len, unsigned int flags);
+ extern void os_dump_core(void) __attribute__ ((noreturn));
+ extern void um_early_printk(const char *s, unsigned int n);
+ extern void os_fix_helper_signals(void);
+diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+index 9838967d0b2f..e0de60e503b9 100644
+--- a/arch/um/kernel/um_arch.c
++++ b/arch/um/kernel/um_arch.c
+@@ -16,6 +16,7 @@
+ #include <linux/sched/task.h>
+ #include <linux/kmsg_dump.h>
+ #include <linux/suspend.h>
++#include <linux/random.h>
+ 
+ #include <asm/processor.h>
+ #include <asm/cpufeature.h>
+@@ -406,6 +407,8 @@ int __init __weak read_initrd(void)
+ 
+ void __init setup_arch(char **cmdline_p)
+ {
++      u8 rng_seed[32];
++
+       stack_protections((unsigned long) &init_thread_info);
+       setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
+       mem_total_pages(physmem_size, iomem_size, highmem);
+@@ -416,6 +419,11 @@ void __init setup_arch(char **cmdline_p)
+       strlcpy(boot_command_line, command_line, COMMAND_LINE_SIZE);
+       *cmdline_p = command_line;
+       setup_hostinfo(host_info, sizeof host_info);
++
++      if (os_getrandom(rng_seed, sizeof(rng_seed), 0) == sizeof(rng_seed)) {
++              add_bootloader_randomness(rng_seed, sizeof(rng_seed));
++              memzero_explicit(rng_seed, sizeof(rng_seed));
++      }
+ }
+ 
+ void __init check_bugs(void)
+diff --git a/arch/um/os-Linux/util.c b/arch/um/os-Linux/util.c
+index 41297ec404bf..fc0f2a9dee5a 100644
+--- a/arch/um/os-Linux/util.c
++++ b/arch/um/os-Linux/util.c
+@@ -14,6 +14,7 @@
+ #include <sys/wait.h>
+ #include <sys/mman.h>
+ #include <sys/utsname.h>
++#include <sys/random.h>
+ #include <init.h>
+ #include <os.h>
+ 
+@@ -96,6 +97,11 @@ static inline void __attribute__ ((noreturn)) uml_abort(void)
+                       exit(127);
+ }
+ 
++ssize_t os_getrandom(void *buf, size_t len, unsigned int flags)
++{
++      return getrandom(buf, len, flags);
++}
++
+ /*
+  * UML helper threads must not handle SIGWINCH/INT/TERM
+  */
+-- 
+2.35.1
+
diff --git a/queue-5.19/usb-dwc3-gadget-fix-high-speed-multiplier-setting.patch-13588 b/queue-5.19/usb-dwc3-gadget-fix-high-speed-multiplier-setting.patch-13588

new file mode 100644 (file)

index 0000000..b93f1a6
--- /dev/null
+++ b/queue-5.19/usb-dwc3-gadget-fix-high-speed-multiplier-setting.patch-13588
@@ -0,0 +1,44 @@
+From 25eec0f3ea8c7037878dbfa2801f71808bbe3897 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:18:12 +0200
+Subject: usb: dwc3: gadget: fix high speed multiplier setting
+
+From: Michael Grzeschik <m.grzeschik@pengutronix.de>
+
+[ Upstream commit 8affe37c525d800a2628c4ecfaed13b77dc5634a ]
+
+For High-Speed Transfers the prepare_one_trb function is calculating the
+multiplier setting for the trb based on the length parameter of the trb
+currently prepared. This assumption is wrong. For trbs with a sg list,
+the length of the actual request has to be taken instead.
+
+Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
+Link: https://lore.kernel.org/r/20220704141812.1532306-3-m.grzeschik@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 1b7d73638969..52d5a7c81362 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1264,10 +1264,10 @@ static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+                               unsigned int mult = 2;
+                               unsigned int maxp = usb_endpoint_maxp(ep->desc);
+ 
+-                              if (trb_length <= (2 * maxp))
++                              if (req->request.length <= (2 * maxp))
+                                       mult--;
+ 
+-                              if (trb_length <= maxp)
++                              if (req->request.length <= maxp)
+                                       mult--;
+ 
+                               trb->size |= DWC3_TRB_SIZE_PCM1(mult);
+-- 
+2.35.1
+
diff --git a/queue-5.19/usb-dwc3-gadget-refactor-dwc3_repare_one_trb.patch-8861 b/queue-5.19/usb-dwc3-gadget-refactor-dwc3_repare_one_trb.patch-8861

new file mode 100644 (file)

index 0000000..24672e2
--- /dev/null
+++ b/queue-5.19/usb-dwc3-gadget-refactor-dwc3_repare_one_trb.patch-8861
@@ -0,0 +1,151 @@
+From a4c08ba674cd5fa59b05d31b34a23c96236de36f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 4 Jul 2022 16:18:11 +0200
+Subject: usb: dwc3: gadget: refactor dwc3_repare_one_trb
+
+From: Michael Grzeschik <m.grzeschik@pengutronix.de>
+
+[ Upstream commit 23385cec5f354794dadced7f28c31da7ae3eb54c ]
+
+The function __dwc3_prepare_one_trb has many parameters. Since it is
+only used in dwc3_prepare_one_trb there is no point in keeping the
+function. We merge both functions and get rid of the big list of
+parameters.
+
+Fixes: 40d829fb2ec6 ("usb: dwc3: gadget: Correct ISOC DATA PIDs for short packets")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
+Link: https://lore.kernel.org/r/20220704141812.1532306-2-m.grzeschik@pengutronix.de
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/dwc3/gadget.c | 92 +++++++++++++++++----------------------
+ 1 file changed, 40 insertions(+), 52 deletions(-)
+
+diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
+index 0d89dfa6eef5..1b7d73638969 100644
+--- a/drivers/usb/dwc3/gadget.c
++++ b/drivers/usb/dwc3/gadget.c
+@@ -1182,17 +1182,49 @@ static u32 dwc3_calc_trbs_left(struct dwc3_ep *dep)
+       return trbs_left;
+ }
+ 
+-static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+-              dma_addr_t dma, unsigned int length, unsigned int chain,
+-              unsigned int node, unsigned int stream_id,
+-              unsigned int short_not_ok, unsigned int no_interrupt,
+-              unsigned int is_last, bool must_interrupt)
++/**
++ * dwc3_prepare_one_trb - setup one TRB from one request
++ * @dep: endpoint for which this request is prepared
++ * @req: dwc3_request pointer
++ * @trb_length: buffer size of the TRB
++ * @chain: should this TRB be chained to the next?
++ * @node: only for isochronous endpoints. First TRB needs different type.
++ * @use_bounce_buffer: set to use bounce buffer
++ * @must_interrupt: set to interrupt on TRB completion
++ */
++static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
++              struct dwc3_request *req, unsigned int trb_length,
++              unsigned int chain, unsigned int node, bool use_bounce_buffer,
++              bool must_interrupt)
+ {
++      struct dwc3_trb         *trb;
++      dma_addr_t              dma;
++      unsigned int            stream_id = req->request.stream_id;
++      unsigned int            short_not_ok = req->request.short_not_ok;
++      unsigned int            no_interrupt = req->request.no_interrupt;
++      unsigned int            is_last = req->request.is_last;
+       struct dwc3             *dwc = dep->dwc;
+       struct usb_gadget       *gadget = dwc->gadget;
+       enum usb_device_speed   speed = gadget->speed;
+ 
+-      trb->size = DWC3_TRB_SIZE_LENGTH(length);
++      if (use_bounce_buffer)
++              dma = dep->dwc->bounce_addr;
++      else if (req->request.num_sgs > 0)
++              dma = sg_dma_address(req->start_sg);
++      else
++              dma = req->request.dma;
++
++      trb = &dep->trb_pool[dep->trb_enqueue];
++
++      if (!req->trb) {
++              dwc3_gadget_move_started_request(req);
++              req->trb = trb;
++              req->trb_dma = dwc3_trb_dma_offset(dep, trb);
++      }
++
++      req->num_trbs++;
++
++      trb->size = DWC3_TRB_SIZE_LENGTH(trb_length);
+       trb->bpl = lower_32_bits(dma);
+       trb->bph = upper_32_bits(dma);
+ 
+@@ -1232,10 +1264,10 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+                               unsigned int mult = 2;
+                               unsigned int maxp = usb_endpoint_maxp(ep->desc);
+ 
+-                              if (length <= (2 * maxp))
++                              if (trb_length <= (2 * maxp))
+                                       mult--;
+ 
+-                              if (length <= maxp)
++                              if (trb_length <= maxp)
+                                       mult--;
+ 
+                               trb->size |= DWC3_TRB_SIZE_PCM1(mult);
+@@ -1309,50 +1341,6 @@ static void __dwc3_prepare_one_trb(struct dwc3_ep *dep, struct dwc3_trb *trb,
+       trace_dwc3_prepare_trb(dep, trb);
+ }
+ 
+-/**
+- * dwc3_prepare_one_trb - setup one TRB from one request
+- * @dep: endpoint for which this request is prepared
+- * @req: dwc3_request pointer
+- * @trb_length: buffer size of the TRB
+- * @chain: should this TRB be chained to the next?
+- * @node: only for isochronous endpoints. First TRB needs different type.
+- * @use_bounce_buffer: set to use bounce buffer
+- * @must_interrupt: set to interrupt on TRB completion
+- */
+-static void dwc3_prepare_one_trb(struct dwc3_ep *dep,
+-              struct dwc3_request *req, unsigned int trb_length,
+-              unsigned int chain, unsigned int node, bool use_bounce_buffer,
+-              bool must_interrupt)
+-{
+-      struct dwc3_trb         *trb;
+-      dma_addr_t              dma;
+-      unsigned int            stream_id = req->request.stream_id;
+-      unsigned int            short_not_ok = req->request.short_not_ok;
+-      unsigned int            no_interrupt = req->request.no_interrupt;
+-      unsigned int            is_last = req->request.is_last;
+-
+-      if (use_bounce_buffer)
+-              dma = dep->dwc->bounce_addr;
+-      else if (req->request.num_sgs > 0)
+-              dma = sg_dma_address(req->start_sg);
+-      else
+-              dma = req->request.dma;
+-
+-      trb = &dep->trb_pool[dep->trb_enqueue];
+-
+-      if (!req->trb) {
+-              dwc3_gadget_move_started_request(req);
+-              req->trb = trb;
+-              req->trb_dma = dwc3_trb_dma_offset(dep, trb);
+-      }
+-
+-      req->num_trbs++;
+-
+-      __dwc3_prepare_one_trb(dep, trb, dma, trb_length, chain, node,
+-                      stream_id, short_not_ok, no_interrupt, is_last,
+-                      must_interrupt);
+-}
+-
+ static bool dwc3_needs_extra_trb(struct dwc3_ep *dep, struct dwc3_request *req)
+ {
+       unsigned int maxp = usb_endpoint_maxp(dep->endpoint.desc);
+-- 
+2.35.1
+
diff --git a/queue-5.19/usb-gadget-fix-use-after-free-read-in-usb_udc_uevent.patch-12274 b/queue-5.19/usb-gadget-fix-use-after-free-read-in-usb_udc_uevent.patch-12274

new file mode 100644 (file)

index 0000000..eed8f8a
--- /dev/null
+++ b/queue-5.19/usb-gadget-fix-use-after-free-read-in-usb_udc_uevent.patch-12274
@@ -0,0 +1,78 @@
+From 695e00b305c742f7d25e7a4347d0a6f6f3488047 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 21 Jul 2022 11:07:10 -0400
+Subject: USB: gadget: Fix use-after-free Read in usb_udc_uevent()
+
+From: Alan Stern <stern@rowland.harvard.edu>
+
+[ Upstream commit 2191c00855b03aa59c20e698be713d952d51fc18 ]
+
+The syzbot fuzzer found a race between uevent callbacks and gadget
+driver unregistration that can cause a use-after-free bug:
+
+---------------------------------------------------------------
+BUG: KASAN: use-after-free in usb_udc_uevent+0x11f/0x130
+drivers/usb/gadget/udc/core.c:1732
+Read of size 8 at addr ffff888078ce2050 by task udevd/2968
+
+CPU: 1 PID: 2968 Comm: udevd Not tainted 5.19.0-rc4-next-20220628-syzkaller #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google
+06/29/2022
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0xcd/0x134 lib/dump_stack.c:106
+ print_address_description mm/kasan/report.c:317 [inline]
+ print_report.cold+0x2ba/0x719 mm/kasan/report.c:433
+ kasan_report+0xbe/0x1f0 mm/kasan/report.c:495
+ usb_udc_uevent+0x11f/0x130 drivers/usb/gadget/udc/core.c:1732
+ dev_uevent+0x290/0x770 drivers/base/core.c:2424
+---------------------------------------------------------------
+
+The bug occurs because usb_udc_uevent() dereferences udc->driver but
+does so without acquiring the udc_lock mutex, which protects this
+field.  If the gadget driver is unbound from the udc concurrently with
+uevent processing, the driver structure may be accessed after it has
+been deallocated.
+
+To prevent the race, we make sure that the routine holds the mutex
+around the racing accesses.
+
+Link: <https://lore.kernel.org/all/0000000000004de90405a719c951@google.com>
+CC: stable@vger.kernel.org # fc274c1e9973
+Reported-and-tested-by: syzbot+b0de012ceb1e2a97891b@syzkaller.appspotmail.com
+Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
+Link: https://lore.kernel.org/r/YtlrnhHyrHsSky9m@rowland.harvard.edu
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/gadget/udc/core.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/usb/gadget/udc/core.c b/drivers/usb/gadget/udc/core.c
+index 7886497253cc..cafcf260394c 100644
+--- a/drivers/usb/gadget/udc/core.c
++++ b/drivers/usb/gadget/udc/core.c
+@@ -1728,13 +1728,14 @@ static int usb_udc_uevent(struct device *dev, struct kobj_uevent_env *env)
+               return ret;
+       }
+ 
+-      if (udc->driver) {
++      mutex_lock(&udc_lock);
++      if (udc->driver)
+               ret = add_uevent_var(env, "USB_UDC_DRIVER=%s",
+                               udc->driver->function);
+-              if (ret) {
+-                      dev_err(dev, "failed to add uevent USB_UDC_DRIVER\n");
+-                      return ret;
+-              }
++      mutex_unlock(&udc_lock);
++      if (ret) {
++              dev_err(dev, "failed to add uevent USB_UDC_DRIVER\n");
++              return ret;
+       }
+ 
+       return 0;
+-- 
+2.35.1
+
diff --git a/queue-5.19/usb-hcd-fix-urb-giveback-issue-in-tasklet-function.patch-24136 b/queue-5.19/usb-hcd-fix-urb-giveback-issue-in-tasklet-function.patch-24136

new file mode 100644 (file)

index 0000000..085ee9d
--- /dev/null
+++ b/queue-5.19/usb-hcd-fix-urb-giveback-issue-in-tasklet-function.patch-24136
@@ -0,0 +1,132 @@
+From bcd8d6752d2fa9b3d117527f7cd8444380afa4a8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 15:49:18 +0800
+Subject: USB: HCD: Fix URB giveback issue in tasklet function
+
+From: Weitao Wang <WeitaoWang-oc@zhaoxin.com>
+
+[ Upstream commit 26c6c2f8a907c9e3a2f24990552a4d77235791e6 ]
+
+Usb core introduce the mechanism of giveback of URB in tasklet context to
+reduce hardware interrupt handling time. On some test situation(such as
+FIO with 4KB block size), when tasklet callback function called to
+giveback URB, interrupt handler add URB node to the bh->head list also.
+If check bh->head list again after finish all URB giveback of local_list,
+then it may introduce a "dynamic balance" between giveback URB and add URB
+to bh->head list. This tasklet callback function may not exit for a long
+time, which will cause other tasklet function calls to be delayed. Some
+real-time applications(such as KB and Mouse) will see noticeable lag.
+
+In order to prevent the tasklet function from occupying the cpu for a long
+time at a time, new URBS will not be added to the local_list even though
+the bh->head list is not empty. But also need to ensure the left URB
+giveback to be processed in time, so add a member high_prio for structure
+giveback_urb_bh to prioritize tasklet and schelule this tasklet again if
+bh->head list is not empty.
+
+At the same time, we are able to prioritize tasklet through structure
+member high_prio. So, replace the local high_prio_bh variable with this
+structure member in usb_hcd_giveback_urb.
+
+Fixes: 94dfd7edfd5c ("USB: HCD: support giveback of URB in tasklet context")
+Cc: stable <stable@kernel.org>
+Reviewed-by: Alan Stern <stern@rowland.harvard.edu>
+Signed-off-by: Weitao Wang <WeitaoWang-oc@zhaoxin.com>
+Link: https://lore.kernel.org/r/20220726074918.5114-1-WeitaoWang-oc@zhaoxin.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/core/hcd.c  | 26 +++++++++++++++-----------
+ include/linux/usb/hcd.h |  1 +
+ 2 files changed, 16 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
+index 06eea8848ccc..11c8ea0cccc8 100644
+--- a/drivers/usb/core/hcd.c
++++ b/drivers/usb/core/hcd.c
+@@ -1691,7 +1691,6 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+ 
+       spin_lock_irq(&bh->lock);
+       bh->running = true;
+- restart:
+       list_replace_init(&bh->head, &local_list);
+       spin_unlock_irq(&bh->lock);
+ 
+@@ -1705,10 +1704,17 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+               bh->completing_ep = NULL;
+       }
+ 
+-      /* check if there are new URBs to giveback */
++      /*
++       * giveback new URBs next time to prevent this function
++       * from not exiting for a long time.
++       */
+       spin_lock_irq(&bh->lock);
+-      if (!list_empty(&bh->head))
+-              goto restart;
++      if (!list_empty(&bh->head)) {
++              if (bh->high_prio)
++                      tasklet_hi_schedule(&bh->bh);
++              else
++                      tasklet_schedule(&bh->bh);
++      }
+       bh->running = false;
+       spin_unlock_irq(&bh->lock);
+ }
+@@ -1737,7 +1743,7 @@ static void usb_giveback_urb_bh(struct tasklet_struct *t)
+ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+ {
+       struct giveback_urb_bh *bh;
+-      bool running, high_prio_bh;
++      bool running;
+ 
+       /* pass status to tasklet via unlinked */
+       if (likely(!urb->unlinked))
+@@ -1748,13 +1754,10 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+               return;
+       }
+ 
+-      if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe)) {
++      if (usb_pipeisoc(urb->pipe) || usb_pipeint(urb->pipe))
+               bh = &hcd->high_prio_bh;
+-              high_prio_bh = true;
+-      } else {
++      else
+               bh = &hcd->low_prio_bh;
+-              high_prio_bh = false;
+-      }
+ 
+       spin_lock(&bh->lock);
+       list_add_tail(&urb->urb_list, &bh->head);
+@@ -1763,7 +1766,7 @@ void usb_hcd_giveback_urb(struct usb_hcd *hcd, struct urb *urb, int status)
+ 
+       if (running)
+               ;
+-      else if (high_prio_bh)
++      else if (bh->high_prio)
+               tasklet_hi_schedule(&bh->bh);
+       else
+               tasklet_schedule(&bh->bh);
+@@ -2959,6 +2962,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
+ 
+       /* initialize tasklets */
+       init_giveback_urb_bh(&hcd->high_prio_bh);
++      hcd->high_prio_bh.high_prio = true;
+       init_giveback_urb_bh(&hcd->low_prio_bh);
+ 
+       /* enable irqs just before we start the controller,
+diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
+index 2c1fc9212cf2..98d1921f02b1 100644
+--- a/include/linux/usb/hcd.h
++++ b/include/linux/usb/hcd.h
+@@ -66,6 +66,7 @@
+ 
+ struct giveback_urb_bh {
+       bool running;
++      bool high_prio;
+       spinlock_t lock;
+       struct list_head  head;
+       struct tasklet_struct bh;
+-- 
+2.35.1
+
diff --git a/queue-5.19/usb-typec-ucsi-acknowledge-the-get_error_status-comm.patch b/queue-5.19/usb-typec-ucsi-acknowledge-the-get_error_status-comm.patch

new file mode 100644 (file)

index 0000000..cda8ea2
--- /dev/null
+++ b/queue-5.19/usb-typec-ucsi-acknowledge-the-get_error_status-comm.patch
@@ -0,0 +1,46 @@
+From d6b81f001c2bae43f67f2e757774341def8894be Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 26 Jul 2022 14:45:49 +0800
+Subject: usb: typec: ucsi: Acknowledge the GET_ERROR_STATUS command completion
+
+From: Linyu Yuan <quic_linyyuan@quicinc.com>
+
+[ Upstream commit a7dc438b5e446afcd1b3b6651da28271400722f2 ]
+
+We found PPM will not send any notification after it report error status
+and OPM issue GET_ERROR_STATUS command to read the details about error.
+
+According UCSI spec, PPM may clear the Error Status Data after the OPM
+has acknowledged the command completion.
+
+This change add operation to acknowledge the command completion from PPM.
+
+Fixes: bdc62f2bae8f (usb: typec: ucsi: Simplified registration and I/O API)
+Cc: <stable@vger.kernel.org> # 5.10
+Signed-off-by: Jack Pham <quic_jackp@quicinc.com>
+Signed-off-by: Linyu Yuan <quic_linyyuan@quicinc.com>
+Link: https://lore.kernel.org/r/1658817949-4632-1-git-send-email-quic_linyyuan@quicinc.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/typec/ucsi/ucsi.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/drivers/usb/typec/ucsi/ucsi.c b/drivers/usb/typec/ucsi/ucsi.c
+index cbd862f9f2a1..1aea46493b85 100644
+--- a/drivers/usb/typec/ucsi/ucsi.c
++++ b/drivers/usb/typec/ucsi/ucsi.c
+@@ -76,6 +76,10 @@ static int ucsi_read_error(struct ucsi *ucsi)
+       if (ret)
+               return ret;
+ 
++      ret = ucsi_acknowledge_command(ucsi);
++      if (ret)
++              return ret;
++
+       switch (error) {
+       case UCSI_ERROR_INCOMPATIBLE_PARTNER:
+               return -EOPNOTSUPP;
+-- 
+2.35.1
+
diff --git a/queue-5.19/usbnet-fix-linkwatch-use-after-free-on-disconnect.patch-30140 b/queue-5.19/usbnet-fix-linkwatch-use-after-free-on-disconnect.patch-30140

new file mode 100644 (file)

index 0000000..aaf8f9a
--- /dev/null
+++ b/queue-5.19/usbnet-fix-linkwatch-use-after-free-on-disconnect.patch-30140
@@ -0,0 +1,90 @@
+From e76a4cdb309afe50d707c3a04df9f64540323d98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 23 Jun 2022 14:50:59 +0200
+Subject: usbnet: Fix linkwatch use-after-free on disconnect
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit a69e617e533edddf3fa3123149900f36e0a6dc74 ]
+
+usbnet uses the work usbnet_deferred_kevent() to perform tasks which may
+sleep.  On disconnect, completion of the work was originally awaited in
+->ndo_stop().  But in 2003, that was moved to ->disconnect() by historic
+commit "[PATCH] USB: usbnet, prevent exotic rtnl deadlock":
+
+  https://git.kernel.org/tglx/history/c/0f138bbfd83c
+
+The change was made because back then, the kernel's workqueue
+implementation did not allow waiting for a single work.  One had to wait
+for completion of *all* work by calling flush_scheduled_work(), and that
+could deadlock when waiting for usbnet_deferred_kevent() with rtnl_mutex
+held in ->ndo_stop().
+
+The commit solved one problem but created another:  It causes a
+use-after-free in USB Ethernet drivers aqc111.c, asix_devices.c,
+ax88179_178a.c, ch9200.c and smsc75xx.c:
+
+* If the drivers receive a link change interrupt immediately before
+  disconnect, they raise EVENT_LINK_RESET in their (non-sleepable)
+  ->status() callback and schedule usbnet_deferred_kevent().
+* usbnet_deferred_kevent() invokes the driver's ->link_reset() callback,
+  which calls netif_carrier_{on,off}().
+* That in turn schedules the work linkwatch_event().
+
+Because usbnet_deferred_kevent() is awaited after unregister_netdev(),
+netif_carrier_{on,off}() may operate on an unregistered netdev and
+linkwatch_event() may run after free_netdev(), causing a use-after-free.
+
+In 2010, usbnet was changed to only wait for a single instance of
+usbnet_deferred_kevent() instead of *all* work by commit 23f333a2bfaf
+("drivers/net: don't use flush_scheduled_work()").
+
+Unfortunately the commit neglected to move the wait back to
+->ndo_stop().  Rectify that omission at long last.
+
+Reported-by: Jann Horn <jannh@google.com>
+Link: https://lore.kernel.org/netdev/CAG48ez0MHBbENX5gCdHAUXZ7h7s20LnepBF-pa5M=7Bi-jZrEA@mail.gmail.com/
+Reported-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Link: https://lore.kernel.org/netdev/20220315113841.GA22337@pengutronix.de/
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org
+Acked-by: Oliver Neukum <oneukum@suse.com>
+Link: https://lore.kernel.org/r/d1c87ebe9fc502bffcd1576e238d685ad08321e4.1655987888.git.lukas@wunner.de
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/usbnet.c | 8 ++------
+ 1 file changed, 2 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
+index 78a92751ce4c..0ed09bb91c44 100644
+--- a/drivers/net/usb/usbnet.c
++++ b/drivers/net/usb/usbnet.c
+@@ -849,13 +849,11 @@ int usbnet_stop (struct net_device *net)
+ 
+       mpn = !test_and_clear_bit(EVENT_NO_RUNTIME_PM, &dev->flags);
+ 
+-      /* deferred work (task, timer, softirq) must also stop.
+-       * can't flush_scheduled_work() until we drop rtnl (later),
+-       * else workers could deadlock; so make workers a NOP.
+-       */
++      /* deferred work (timer, softirq, task) must also stop */
+       dev->flags = 0;
+       del_timer_sync (&dev->delay);
+       tasklet_kill (&dev->bh);
++      cancel_work_sync(&dev->kevent);
+       if (!pm)
+               usb_autopm_put_interface(dev->intf);
+ 
+@@ -1619,8 +1617,6 @@ void usbnet_disconnect (struct usb_interface *intf)
+       net = dev->net;
+       unregister_netdev (net);
+ 
+-      cancel_work_sync(&dev->kevent);
+-
+       usb_scuttle_anchored_urbs(&dev->deferred);
+ 
+       if (dev->driver_info->unbind)
+-- 
+2.35.1
+
diff --git a/queue-5.19/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch-22908 b/queue-5.19/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch-22908

new file mode 100644 (file)

index 0000000..e57706b
--- /dev/null
+++ b/queue-5.19/usbnet-smsc95xx-fix-deadlock-on-runtime-resume.patch-22908
@@ -0,0 +1,193 @@
+From 7660b6340c5f30ea6c98ee7271cefad4f9193d98 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 1 Jul 2022 22:47:51 +0200
+Subject: usbnet: smsc95xx: Fix deadlock on runtime resume
+
+From: Lukas Wunner <lukas@wunner.de>
+
+[ Upstream commit 7b960c967f2aa01ab8f45c5a0bd78e754cffdeee ]
+
+Commit 05b35e7eb9a1 ("smsc95xx: add phylib support") amended
+smsc95xx_resume() to call phy_init_hw().  That function waits for the
+device to runtime resume even though it is placed in the runtime resume
+path, causing a deadlock.
+
+The problem is that phy_init_hw() calls down to smsc95xx_mdiobus_read(),
+which never uses the _nopm variant of usbnet_read_cmd().
+
+Commit b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with
+reset operation") causes a similar deadlock on resume if the device was
+already runtime suspended when entering system sleep:
+
+That's because the commit introduced smsc95xx_reset_resume(), which
+calls down to smsc95xx_reset(), which neglects to use _nopm accessors.
+
+Fix by auto-detecting whether a device access is performed by the
+suspend/resume task_struct and use the _nopm variant if so.  This works
+because the PM core guarantees that suspend/resume callbacks are run in
+task context.
+
+Stacktrace for posterity:
+
+  INFO: task kworker/2:1:49 blocked for more than 122 seconds.
+  Workqueue: usb_hub_wq hub_event
+  schedule
+  rpm_resume
+  __pm_runtime_resume
+  usb_autopm_get_interface
+  usbnet_read_cmd
+  __smsc95xx_read_reg
+  __smsc95xx_phy_wait_not_busy
+  __smsc95xx_mdio_read
+  smsc95xx_mdiobus_read
+  __mdiobus_read
+  mdiobus_read
+  smsc_phy_reset
+  phy_init_hw
+  smsc95xx_resume
+  usb_resume_interface
+  usb_resume_both
+  usb_runtime_resume
+  __rpm_callback
+  rpm_callback
+  rpm_resume
+  __pm_runtime_resume
+  usb_autoresume_device
+  hub_event
+  process_one_work
+
+Fixes: b4df480f68ae ("usbnet: smsc95xx: add reset_resume function with reset operation")
+Signed-off-by: Lukas Wunner <lukas@wunner.de>
+Cc: stable@vger.kernel.org # v3.16+
+Cc: Andre Edich <andre.edich@microchip.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/usb/smsc95xx.c | 26 ++++++++++++++++++++------
+ 1 file changed, 20 insertions(+), 6 deletions(-)
+
+diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c
+index bd03e16f98a1..4dc43929e370 100644
+--- a/drivers/net/usb/smsc95xx.c
++++ b/drivers/net/usb/smsc95xx.c
+@@ -71,6 +71,7 @@ struct smsc95xx_priv {
+       struct fwnode_handle *irqfwnode;
+       struct mii_bus *mdiobus;
+       struct phy_device *phydev;
++      struct task_struct *pm_task;
+ };
+ 
+ static bool turbo_mode = true;
+@@ -80,13 +81,14 @@ MODULE_PARM_DESC(turbo_mode, "Enable multiple frames per Rx transaction");
+ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
+                                           u32 *data, int in_pm)
+ {
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       u32 buf;
+       int ret;
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, void *, u16);
+ 
+       BUG_ON(!dev);
+ 
+-      if (!in_pm)
++      if (current != pdata->pm_task)
+               fn = usbnet_read_cmd;
+       else
+               fn = usbnet_read_cmd_nopm;
+@@ -110,13 +112,14 @@ static int __must_check __smsc95xx_read_reg(struct usbnet *dev, u32 index,
+ static int __must_check __smsc95xx_write_reg(struct usbnet *dev, u32 index,
+                                            u32 data, int in_pm)
+ {
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       u32 buf;
+       int ret;
+       int (*fn)(struct usbnet *, u8, u8, u16, u16, const void *, u16);
+ 
+       BUG_ON(!dev);
+ 
+-      if (!in_pm)
++      if (current != pdata->pm_task)
+               fn = usbnet_write_cmd;
+       else
+               fn = usbnet_write_cmd_nopm;
+@@ -1490,9 +1493,12 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
+       u32 val, link_up;
+       int ret;
+ 
++      pdata->pm_task = current;
++
+       ret = usbnet_suspend(intf, message);
+       if (ret < 0) {
+               netdev_warn(dev->net, "usbnet_suspend error\n");
++              pdata->pm_task = NULL;
+               return ret;
+       }
+ 
+@@ -1732,6 +1738,7 @@ static int smsc95xx_suspend(struct usb_interface *intf, pm_message_t message)
+       if (ret && PMSG_IS_AUTO(message))
+               usbnet_resume(intf);
+ 
++      pdata->pm_task = NULL;
+       return ret;
+ }
+ 
+@@ -1752,29 +1759,31 @@ static int smsc95xx_resume(struct usb_interface *intf)
+       /* do this first to ensure it's cleared even in error case */
+       pdata->suspend_flags = 0;
+ 
++      pdata->pm_task = current;
++
+       if (suspend_flags & SUSPEND_ALLMODES) {
+               /* clear wake-up sources */
+               ret = smsc95xx_read_reg_nopm(dev, WUCSR, &val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+ 
+               val &= ~(WUCSR_WAKE_EN_ | WUCSR_MPEN_);
+ 
+               ret = smsc95xx_write_reg_nopm(dev, WUCSR, val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+ 
+               /* clear wake-up status */
+               ret = smsc95xx_read_reg_nopm(dev, PM_CTRL, &val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+ 
+               val &= ~PM_CTL_WOL_EN_;
+               val |= PM_CTL_WUPS_;
+ 
+               ret = smsc95xx_write_reg_nopm(dev, PM_CTRL, val);
+               if (ret < 0)
+-                      return ret;
++                      goto done;
+       }
+ 
+       phy_init_hw(pdata->phydev);
+@@ -1783,15 +1792,20 @@ static int smsc95xx_resume(struct usb_interface *intf)
+       if (ret < 0)
+               netdev_warn(dev->net, "usbnet_resume error\n");
+ 
++done:
++      pdata->pm_task = NULL;
+       return ret;
+ }
+ 
+ static int smsc95xx_reset_resume(struct usb_interface *intf)
+ {
+       struct usbnet *dev = usb_get_intfdata(intf);
++      struct smsc95xx_priv *pdata = dev->driver_priv;
+       int ret;
+ 
++      pdata->pm_task = current;
+       ret = smsc95xx_reset(dev);
++      pdata->pm_task = NULL;
+       if (ret < 0)
+               return ret;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/vfs-check-the-truncate-maximum-size-in-inode_newsize.patch b/queue-5.19/vfs-check-the-truncate-maximum-size-in-inode_newsize.patch

new file mode 100644 (file)

index 0000000..79006fe
--- /dev/null
+++ b/queue-5.19/vfs-check-the-truncate-maximum-size-in-inode_newsize.patch
@@ -0,0 +1,73 @@
+From e04fea8cd8ac26f4d38a6c8f8550dc732235ab0c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 8 Aug 2022 09:52:35 +0100
+Subject: vfs: Check the truncate maximum size in inode_newsize_ok()
+
+From: David Howells <dhowells@redhat.com>
+
+[ Upstream commit e2ebff9c57fe4eb104ce4768f6ebcccf76bef849 ]
+
+If something manages to set the maximum file size to MAX_OFFSET+1, this
+can cause the xfs and ext4 filesystems at least to become corrupt.
+
+Ordinarily, the kernel protects against userspace trying this by
+checking the value early in the truncate() and ftruncate() system calls
+calls - but there are at least two places that this check is bypassed:
+
+ (1) Cachefiles will round up the EOF of the backing file to DIO block
+     size so as to allow DIO on the final block - but this might push
+     the offset negative. It then calls notify_change(), but this
+     inadvertently bypasses the checking. This can be triggered if
+     someone puts an 8EiB-1 file on a server for someone else to try and
+     access by, say, nfs.
+
+ (2) ksmbd doesn't check the value it is given in set_end_of_file_info()
+     and then calls vfs_truncate() directly - which also bypasses the
+     check.
+
+In both cases, it is potentially possible for a network filesystem to
+cause a disk filesystem to be corrupted: cachefiles in the client's
+cache filesystem; ksmbd in the server's filesystem.
+
+nfsd is okay as it checks the value, but we can then remove this check
+too.
+
+Fix this by adding a check to inode_newsize_ok(), as called from
+setattr_prepare(), thereby catching the issue as filesystems set up to
+perform the truncate with minimal opportunity for bypassing the new
+check.
+
+Fixes: 1f08c925e7a3 ("cachefiles: Implement backing file wrangling")
+Fixes: f44158485826 ("cifsd: add file operations")
+Signed-off-by: David Howells <dhowells@redhat.com>
+Reported-by: Jeff Layton <jlayton@kernel.org>
+Tested-by: Jeff Layton <jlayton@kernel.org>
+Reviewed-by: Namjae Jeon <linkinjeon@kernel.org>
+Cc: stable@kernel.org
+Acked-by: Alexander Viro <viro@zeniv.linux.org.uk>
+cc: Steve French <sfrench@samba.org>
+cc: Hyunchul Lee <hyc.lee@gmail.com>
+cc: Chuck Lever <chuck.lever@oracle.com>
+cc: Dave Wysochanski <dwysocha@redhat.com>
+Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/attr.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/fs/attr.c b/fs/attr.c
+index dbe996b0dedf..f581c4d00897 100644
+--- a/fs/attr.c
++++ b/fs/attr.c
+@@ -184,6 +184,8 @@ EXPORT_SYMBOL(setattr_prepare);
+  */
+ int inode_newsize_ok(const struct inode *inode, loff_t offset)
+ {
++      if (offset < 0)
++              return -EINVAL;
+       if (inode->i_size < offset) {
+               unsigned long limit;
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch b/queue-5.19/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch

new file mode 100644 (file)

index 0000000..a7472e2
--- /dev/null
+++ b/queue-5.19/x86-kprobes-update-kcb-status-flag-after-singlestepp.patch
@@ -0,0 +1,67 @@
+From d71f841a854f95581d4ab63274cd768d8381a44c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 Aug 2022 15:04:16 +0900
+Subject: x86/kprobes: Update kcb status flag after singlestepping
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+
+[ Upstream commit dec8784c9088b131a1523f582c2194cfc8107dc0 ]
+
+Fix kprobes to update kcb (kprobes control block) status flag to
+KPROBE_HIT_SSDONE even if the kp->post_handler is not set.
+
+This bug may cause a kernel panic if another INT3 user runs right
+after kprobes because kprobe_int3_handler() misunderstands the
+INT3 is kprobe's single stepping INT3.
+
+Fixes: 6256e668b7af ("x86/kprobes: Use int3 instead of debug trap for single-step")
+Reported-by: Daniel Müller <deso@posteo.net>
+Signed-off-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Ingo Molnar <mingo@kernel.org>
+Tested-by: Daniel Müller <deso@posteo.net>
+Cc: stable@vger.kernel.org
+Link: https://lore.kernel.org/all/20220727210136.jjgc3lpqeq42yr3m@muellerd-fedora-PC2BDTX9
+Link: https://lore.kernel.org/r/165942025658.342061.12452378391879093249.stgit@devnote2
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/kernel/kprobes/core.c | 18 +++++++++++-------
+ 1 file changed, 11 insertions(+), 7 deletions(-)
+
+diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c
+index 7c4ab8870da4..74167dc5f55e 100644
+--- a/arch/x86/kernel/kprobes/core.c
++++ b/arch/x86/kernel/kprobes/core.c
+@@ -814,16 +814,20 @@ set_current_kprobe(struct kprobe *p, struct pt_regs *regs,
+ static void kprobe_post_process(struct kprobe *cur, struct pt_regs *regs,
+                              struct kprobe_ctlblk *kcb)
+ {
+-      if ((kcb->kprobe_status != KPROBE_REENTER) && cur->post_handler) {
+-              kcb->kprobe_status = KPROBE_HIT_SSDONE;
+-              cur->post_handler(cur, regs, 0);
+-      }
+-
+       /* Restore back the original saved kprobes variables and continue. */
+-      if (kcb->kprobe_status == KPROBE_REENTER)
++      if (kcb->kprobe_status == KPROBE_REENTER) {
++              /* This will restore both kcb and current_kprobe */
+               restore_previous_kprobe(kcb);
+-      else
++      } else {
++              /*
++               * Always update the kcb status because
++               * reset_curent_kprobe() doesn't update kcb.
++               */
++              kcb->kprobe_status = KPROBE_HIT_SSDONE;
++              if (cur->post_handler)
++                      cur->post_handler(cur, regs, 0);
+               reset_current_kprobe();
++      }
+ }
+ NOKPROBE_SYMBOL(kprobe_post_process);
+ 
+-- 
+2.35.1
+
diff --git a/queue-5.19/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch b/queue-5.19/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch

new file mode 100644 (file)

index 0000000..84467c4
--- /dev/null
+++ b/queue-5.19/x86-olpc-fix-logical-not-is-only-applied-to-the-left.patch
@@ -0,0 +1,54 @@
+From 181fac7e1d71b4723e799eb09ab117ee9407108b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 15 Jul 2022 17:15:36 +0200
+Subject: x86/olpc: fix 'logical not is only applied to the left hand side'
+
+From: Alexander Lobakin <alexandr.lobakin@intel.com>
+
+[ Upstream commit 3a2ba42cbd0b669ce3837ba400905f93dd06c79f ]
+
+The bitops compile-time optimization series revealed one more
+problem in olpc-xo1-sci.c:send_ebook_state(), resulted in GCC
+warnings:
+
+arch/x86/platform/olpc/olpc-xo1-sci.c: In function 'send_ebook_state':
+arch/x86/platform/olpc/olpc-xo1-sci.c:83:63: warning: logical not is only applied to the left hand side of comparison [-Wlogical-not-parentheses]
+   83 |         if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
+      |                                                               ^~
+arch/x86/platform/olpc/olpc-xo1-sci.c:83:13: note: add parentheses around left hand side expression to silence this warning
+
+Despite this code working as intended, this redundant double
+negation of boolean value, together with comparing to `char`
+with no explicit conversion to bool, makes compilers think
+the author made some unintentional logical mistakes here.
+Make it the other way around and negate the char instead
+to silence the warnings.
+
+Fixes: d2aa37411b8e ("x86/olpc/xo1/sci: Produce wakeup events for buttons and switches")
+Cc: stable@vger.kernel.org # 3.5+
+Reported-by: Guenter Roeck <linux@roeck-us.net>
+Reported-by: kernel test robot <lkp@intel.com>
+Reviewed-and-tested-by: Guenter Roeck <linux@roeck-us.net>
+Signed-off-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Signed-off-by: Yury Norov <yury.norov@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/x86/platform/olpc/olpc-xo1-sci.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/x86/platform/olpc/olpc-xo1-sci.c b/arch/x86/platform/olpc/olpc-xo1-sci.c
+index f03a6883dcc6..89f25af4b3c3 100644
+--- a/arch/x86/platform/olpc/olpc-xo1-sci.c
++++ b/arch/x86/platform/olpc/olpc-xo1-sci.c
+@@ -80,7 +80,7 @@ static void send_ebook_state(void)
+               return;
+       }
+ 
+-      if (!!test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == state)
++      if (test_bit(SW_TABLET_MODE, ebook_switch_idev->sw) == !!state)
+               return; /* Nothing new to report. */
+ 
+       input_report_switch(ebook_switch_idev, SW_TABLET_MODE, state);
+-- 
+2.35.1
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 15 Aug 2022 05:56:00 +0000 (01:56 -0400)