Fixes for 6.1

author Sasha Levin <sashal@kernel.org>

Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)

committer Sasha Levin <sashal@kernel.org>

Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)
author Sasha Levin <sashal@kernel.org>
Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)
committer Sasha Levin <sashal@kernel.org>
Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)
diff --git a/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch b/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch

new file mode 100644 (file)

index 0000000..9ccde5f
--- /dev/null
+++ b/queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch
@@ -0,0 +1,46 @@
+From 92bf9e7e60ec477f33e9520a2f8ed58c717a4f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 20:45:04 +0200
+Subject: ACPI: video: Add backlight=native DMI quirk for Dell Studio 1569
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit 23d28cc0444be3f694eb986cd653b6888b78431d ]
+
+The Dell Studio 1569 predates Windows 8, so it defaults to using
+acpi_video# for backlight control, but this is non functional on
+this model.
+
+Add a DMI quirk to use the native intel_backlight interface which
+does work properly.
+
+Reported-by: raycekarneal <raycekarneal@gmail.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/video_detect.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
+index 4a77e7e6e3fa0..c8dd7f7407da2 100644
+--- a/drivers/acpi/video_detect.c
++++ b/drivers/acpi/video_detect.c
+@@ -526,6 +526,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+               DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"),
+               },
+       },
++      {
++       .callback = video_detect_force_native,
++       /* Dell Studio 1569 */
++       .matches = {
++              DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++              DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1569"),
++              },
++      },
+       {
+        .callback = video_detect_force_native,
+        /* Acer Aspire 3830TG */
+-- 
+2.39.2
+
diff --git a/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch b/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch

new file mode 100644 (file)

index 0000000..9aaf0c9
--- /dev/null
+++ b/queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch
@@ -0,0 +1,155 @@
+From af0f59a65f332284ca2bf7579e4158dff37dc62d Mon Sep 17 00:00:00 2001
+From: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
+Date: Wed, 10 May 2023 19:39:05 +0200
+Subject: [PATCH AUTOSEL 4.19 02/11] ALSA: emu10k1: roll up loops in DSP setup
+ code for Audigy
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 8cabf83c7aa54530e699be56249fb44f9505c4f3 ]
+
+There is no apparent reason for the massive code duplication.
+
+Signed-off-by: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
+Link: https://lore.kernel.org/r/20230510173917.3073107-3-oswald.buddenhagen@gmx.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/emu10k1/emufx.c | 112 +++-----------------------------------
+ 1 file changed, 9 insertions(+), 103 deletions(-)
+
+diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
+index 1f25e6d029d82..84d98c098b744 100644
+--- a/sound/pci/emu10k1/emufx.c
++++ b/sound/pci/emu10k1/emufx.c
+@@ -1550,14 +1550,8 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+       gpr += 2;
+ 
+       /* Master volume (will be renamed later) */
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS));
+-      A_OP(icode, &ptr, iMAC0, A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS));
++      for (z = 0; z < 8; z++)
++              A_OP(icode, &ptr, iMAC0, A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS));
+       snd_emu10k1_init_mono_control(&controls[nctl++], "Wave Master Playback Volume", gpr, 0);
+       gpr += 2;
+ 
+@@ -1641,102 +1635,14 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+                       dev_dbg(emu->card->dev, "emufx.c: gpr=0x%x, tmp=0x%x\n",
+                              gpr, tmp);
+                       */
+-                      /* For the EMU1010: How to get 32bit values from the DSP. High 16bits into L, low 16bits into R. */
+-                      /* A_P16VIN(0) is delayed by one sample,
+-                       * so all other A_P16VIN channels will need to also be delayed
+-                       */
+-                      /* Left ADC in. 1 of 2 */
+                       snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_P16VIN(0x0), A_FXBUS2(0) );
+-                      /* Right ADC in 1 of 2 */
+-                      gpr_map[gpr++] = 0x00000000;
+-                      /* Delaying by one sample: instead of copying the input
+-                       * value A_P16VIN to output A_FXBUS2 as in the first channel,
+-                       * we use an auxiliary register, delaying the value by one
+-                       * sample
+-                       */
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(2) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x1), A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(4) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x2), A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(6) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x3), A_C_00000000, A_C_00000000);
+-                      /* For 96kHz mode */
+-                      /* Left ADC in. 2 of 2 */
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0x8) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x4), A_C_00000000, A_C_00000000);
+-                      /* Right ADC in 2 of 2 */
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xa) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x5), A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xc) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x6), A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xe) );
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x7), A_C_00000000, A_C_00000000);
+-                      /* Pavel Hofman - we still have voices, A_FXBUS2s, and
+-                       * A_P16VINs available -
+-                       * let's add 8 more capture channels - total of 16
+-                       */
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x10));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x8),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x12));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x9),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x14));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xa),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x16));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xb),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x18));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xc),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x1a));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xd),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x1c));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xe),
+-                           A_C_00000000, A_C_00000000);
+-                      gpr_map[gpr++] = 0x00000000;
+-                      snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+-                                                                bit_shifter16,
+-                                                                A_GPR(gpr - 1),
+-                                                                A_FXBUS2(0x1e));
+-                      A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xf),
+-                           A_C_00000000, A_C_00000000);
++                      /* A_P16VIN(0) is delayed by one sample, so all other A_P16VIN channels
++                       * will need to also be delayed; we use an auxiliary register for that. */
++                      for (z = 1; z < 0x10; z++) {
++                              snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr), A_FXBUS2(z * 2) );
++                              A_OP(icode, &ptr, iACC3, A_GPR(gpr), A_P16VIN(z), A_C_00000000, A_C_00000000);
++                              gpr_map[gpr++] = 0x00000000;
++                      }
+               }
+ 
+ #if 0
+-- 
+2.39.2
+
diff --git a/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch b/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch

new file mode 100644 (file)

index 0000000..3f4c3ac
--- /dev/null
+++ b/queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch
@@ -0,0 +1,82 @@
+From 3d60fd0a504a6c9938b831d63bf6bc1a74979fdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 09:20:21 +0100
+Subject: ALSA: hda/realtek: Fix generic fixup definition for cs35l41 amp
+
+From: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+
+[ Upstream commit f7b069cf08816252f494d193b9ecdff172bf9aa1 ]
+
+Generic fixup for CS35L41 amplifies should not have vendor specific
+chained fixup. For ThinkPad laptops with led issue, we can just add
+specific fixup.
+
+Fixes: a6ac60b36dade (ALSA: hda/realtek: Fix mute led issue on thinkpad with cs35l41 s-codec)
+Signed-off-by: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+Link: https://lore.kernel.org/r/20230720082022.13033-1-vitalyr@opensource.cirrus.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 1a8ca119ffe45..cb34a62075b13 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7220,6 +7220,7 @@ enum {
+       ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN,
+       ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
+       ALC236_FIXUP_DELL_DUAL_CODECS,
++      ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
+ };
+ 
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9090,8 +9091,6 @@ static const struct hda_fixup alc269_fixups[] = {
+       [ALC287_FIXUP_CS35L41_I2C_2] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cs35l41_fixup_i2c_two,
+-              .chained = true,
+-              .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+       },
+       [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
+               .type = HDA_FIXUP_FUNC,
+@@ -9228,6 +9227,12 @@ static const struct hda_fixup alc269_fixups[] = {
+               .chained = true,
+               .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+       },
++      [ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI] = {
++              .type = HDA_FIXUP_FUNC,
++              .v.func = cs35l41_fixup_i2c_two,
++              .chained = true,
++              .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
++      },
+ };
+ 
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -9750,14 +9755,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+       SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
+       SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
+       SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
+-      SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+-      SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
++      SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++      SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+       SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+       SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+       SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+-- 
+2.39.2
+
diff --git a/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch b/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch

new file mode 100644 (file)

index 0000000..dc7aa29
--- /dev/null
+++ b/queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch
@@ -0,0 +1,63 @@
+From 01fe45bc121655c2ea7d823e3442f3c388fb23b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 16:23:54 +0530
+Subject: ASoC: amd: acp: fix for invalid dai id handling in
+ acp_get_byte_count()
+
+From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+
+[ Upstream commit 85aeab362201cf52c34cd429e4f6c75a0b42f9a3 ]
+
+For invalid dai id, instead of returning -EINVAL
+return bytes count as zero in acp_get_byte_count() function.
+
+Fixes: 623621a9f9e1 ("ASoC: amd: Add common framework to support I2S on ACP SOC")
+
+Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+Link: https://lore.kernel.org/r/20230626105356.2580125-6-Vijendar.Mukunda@amd.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/amd/acp/amd.h | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/sound/soc/amd/acp/amd.h b/sound/soc/amd/acp/amd.h
+index 5f2119f422715..12a176a50fd6e 100644
+--- a/sound/soc/amd/acp/amd.h
++++ b/sound/soc/amd/acp/amd.h
+@@ -173,7 +173,7 @@ int snd_amd_acp_find_config(struct pci_dev *pci);
+ 
+ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int direction)
+ {
+-      u64 byte_count, low = 0, high = 0;
++      u64 byte_count = 0, low = 0, high = 0;
+ 
+       if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+               switch (dai_id) {
+@@ -191,7 +191,7 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
+                       break;
+               default:
+                       dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
+-                      return -EINVAL;
++                      goto POINTER_RETURN_BYTES;
+               }
+       } else {
+               switch (dai_id) {
+@@ -213,12 +213,13 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
+                       break;
+               default:
+                       dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
+-                      return -EINVAL;
++                      goto POINTER_RETURN_BYTES;
+               }
+       }
+       /* Get 64 bit value from two 32 bit registers */
+       byte_count = (high << 32) | low;
+ 
++POINTER_RETURN_BYTES:
+       return byte_count;
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch b/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch

new file mode 100644 (file)

index 0000000..2f4c267
--- /dev/null
+++ b/queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch
@@ -0,0 +1,51 @@
+From 8fdb4c209948ee94e6e06e178741f29d84f4e4d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 13:57:23 +0100
+Subject: ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR
+
+From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+[ Upstream commit c03226ba15fe3c42d13907ec7d8536396602557b ]
+
+dB range for HPHL and HPHR gains are from +6dB to -30dB in steps of
+1.5dB with register values range from 0 to 24.
+
+Current code maps these dB ranges incorrectly, fix them to allow proper
+volume setting.
+
+Fixes: e8ba1e05bdc0 ("ASoC: codecs: wcd938x: add basic controls")
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230705125723.40464-1-srinivas.kandagatla@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index 7715040383840..2316481c2541b 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -210,7 +210,7 @@ struct wcd938x_priv {
+ };
+ 
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
+-static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(line_gain, 600, -3000);
++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
+ 
+ struct wcd938x_mbhc_zdet_param {
+@@ -2662,8 +2662,8 @@ static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
+                      wcd938x_get_swr_port, wcd938x_set_swr_port),
+       SOC_SINGLE_EXT("DSD_R Switch", WCD938X_DSD_R, 0, 1, 0,
+                      wcd938x_get_swr_port, wcd938x_set_swr_port),
+-      SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 0, line_gain),
+-      SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 0, line_gain),
++      SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 1, line_gain),
++      SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 1, line_gain),
+       WCD938X_EAR_PA_GAIN_TLV("EAR_PA Volume", WCD938X_ANA_EAR_COMPANDER_CTL,
+                               2, 0x10, 0, ear_pa_gain),
+       SOC_SINGLE_EXT("ADC1 Switch", WCD938X_ADC1, 1, 1, 0,
+-- 
+2.39.2
+
diff --git a/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch b/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch

new file mode 100644 (file)

index 0000000..5a1143b
--- /dev/null
+++ b/queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch
@@ -0,0 +1,43 @@
+From 6837fd2094a0338619e2fbd26039c39ad53d3cf8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 16:27:13 +0200
+Subject: ASoC: codecs: wcd938x: fix mbhc impedance loglevel
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit e5ce198bd5c6923b6a51e1493b1401f84c24b26d ]
+
+Demote the MBHC impedance measurement printk, which is not an error
+message, from error to debug level.
+
+While at it, fix the capitalisation of "ohm" and add the missing space
+before the opening parenthesis.
+
+Fixes: bcee7ed09b8e ("ASoC: codecs: wcd938x: add Multi Button Headset Control support")
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230630142717.5314-2-johan+linaro@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index df0b3ac7f1321..7715040383840 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -2165,8 +2165,8 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
+       else if (x1 < minCode_param[noff])
+               *zdet = WCD938X_ZDET_FLOATING_IMPEDANCE;
+ 
+-      pr_err("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+-              __func__, d1, c1, x1, *zdet);
++      pr_debug("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d (milliohm)\n",
++               __func__, d1, c1, x1, *zdet);
+ ramp_down:
+       i = 0;
+       while (x1) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch b/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch

new file mode 100644 (file)

index 0000000..a14f4eb
--- /dev/null
+++ b/queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch
@@ -0,0 +1,60 @@
+From 4b2b48aa8c43caaeef24802e4265e3ba2daa7ba5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 14:18:42 +0100
+Subject: ASoC: qcom: q6apm: do not close GPR port before closing graph
+
+From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+[ Upstream commit c1be62923d4d86e7c06b1224626e27eb8d9ab32e ]
+
+Closing GPR port before graph close can result in un handled notifications
+from DSP, this results in spam of errors from GPR driver as there is no
+one to handle these notification at that point in time.
+
+Fix this by closing GPR port after graph close is finished.
+
+Fixes: 5477518b8a0e ("ASoC: qdsp6: audioreach: add q6apm support")
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230705131842.41584-1-srinivas.kandagatla@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/qcom/qdsp6/q6apm.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
+index 794019286c704..16acdf3a99e1c 100644
+--- a/sound/soc/qcom/qdsp6/q6apm.c
++++ b/sound/soc/qcom/qdsp6/q6apm.c
+@@ -515,6 +515,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
+ 
+       switch (hdr->opcode) {
+       case DATA_CMD_RSP_WR_SH_MEM_EP_DATA_BUFFER_DONE_V2:
++              if (!graph->ar_graph)
++                      break;
+               client_event = APM_CLIENT_EVENT_DATA_WRITE_DONE;
+               mutex_lock(&graph->lock);
+               token = hdr->token & APM_WRITE_TOKEN_MASK;
+@@ -548,6 +550,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
+               wake_up(&graph->cmd_wait);
+               break;
+       case DATA_CMD_RSP_RD_SH_MEM_EP_DATA_BUFFER_V2:
++              if (!graph->ar_graph)
++                      break;
+               client_event = APM_CLIENT_EVENT_DATA_READ_DONE;
+               mutex_lock(&graph->lock);
+               rd_done = data->payload;
+@@ -650,8 +654,9 @@ int q6apm_graph_close(struct q6apm_graph *graph)
+ {
+       struct audioreach_graph *ar_graph = graph->ar_graph;
+ 
+-      gpr_free_port(graph->port);
++      graph->ar_graph = NULL;
+       kref_put(&ar_graph->refcount, q6apm_put_audioreach_graph);
++      gpr_free_port(graph->port);
+       kfree(graph);
+ 
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch b/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch

new file mode 100644 (file)

index 0000000..15bf7cc
--- /dev/null
+++ b/queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch
@@ -0,0 +1,60 @@
+From f51906ec30b0242c56247bae4862008fd7ae2eeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 14:25:23 +0300
+Subject: ASoC: SOF: ipc3-dtrace: uninitialized data in
+ dfsentry_trace_filter_write()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 469e2f28c2cbee2430058c1c9bb6d1675d7195fb ]
+
+This doesn't check how many bytes the simple_write_to_buffer() writes to
+the buffer.  The only thing that we know is that the first byte is
+initialized and the last byte of the buffer is set to NUL.  However
+the middle bytes could be uninitialized.
+
+There is no need to use simple_write_to_buffer().  This code does not
+support partial writes but instead passes "pos = 0" as the starting
+offset regardless of what the user passed as "*ppos".  Just use the
+copy_from_user() function and initialize the whole buffer.
+
+Fixes: 671e0b90051e ("ASoC: SOF: Clone the trace code to ipc3-dtrace as fw_tracing implementation")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/74148292-ce4d-4e01-a1a7-921e6767da14@moroto.mountain
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/ipc3-dtrace.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c
+index b815b0244d9e4..8cf421577378c 100644
+--- a/sound/soc/sof/ipc3-dtrace.c
++++ b/sound/soc/sof/ipc3-dtrace.c
+@@ -187,7 +187,6 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
+       struct snd_sof_dfsentry *dfse = file->private_data;
+       struct sof_ipc_trace_filter_elem *elems = NULL;
+       struct snd_sof_dev *sdev = dfse->sdev;
+-      loff_t pos = 0;
+       int num_elems;
+       char *string;
+       int ret;
+@@ -202,11 +201,11 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
+       if (!string)
+               return -ENOMEM;
+ 
+-      /* assert null termination */
+-      string[count] = 0;
+-      ret = simple_write_to_buffer(string, count, &pos, from, count);
+-      if (ret < 0)
++      if (copy_from_user(string, from, count)) {
++              ret = -EFAULT;
+               goto error;
++      }
++      string[count] = '\0';
+ 
+       ret = trace_filter_parse(sdev, string, &num_elems, &elems);
+       if (ret < 0)
+-- 
+2.39.2
+
diff --git a/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch b/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch

new file mode 100644 (file)

index 0000000..625180f
--- /dev/null
+++ b/queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch
@@ -0,0 +1,168 @@
+From f56314f8f520be77c9344013ed73653e992d3600 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:32 +0300
+Subject: Bluetooth: hci_event: call disconnect callback before deleting conn
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit 7f7cfcb6f0825652973b780f248603e23f16ee90 ]
+
+In hci_cs_disconnect, we do hci_conn_del even if disconnection failed.
+
+ISO, L2CAP and SCO connections refer to the hci_conn without
+hci_conn_get, so disconn_cfm must be called so they can clean up their
+conn, otherwise use-after-free occurs.
+
+ISO:
+==========================================================
+iso_sock_connect:880: sk 00000000eabd6557
+iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+...
+iso_conn_add:140: hcon 000000001696f1fd conn 00000000b6251073
+hci_dev_put:1487: hci0 orig refcnt 17
+__iso_chan_add:214: conn 00000000b6251073
+iso_sock_clear_timer:117: sock 00000000eabd6557 state 3
+...
+hci_rx_work:4085: hci0 Event packet
+hci_event_packet:7601: hci0: event 0x0f
+hci_cmd_status_evt:4346: hci0: opcode 0x0406
+hci_cs_disconnect:2760: hci0: status 0x0c
+hci_sent_cmd_data:3107: hci0 opcode 0x0406
+hci_conn_del:1151: hci0 hcon 000000001696f1fd handle 2560
+hci_conn_unlink:1102: hci0: hcon 000000001696f1fd
+hci_conn_drop:1451: hcon 00000000d8521aaf orig refcnt 2
+hci_chan_list_flush:2780: hcon 000000001696f1fd
+hci_dev_put:1487: hci0 orig refcnt 21
+hci_dev_put:1487: hci0 orig refcnt 20
+hci_req_cmd_complete:3978: opcode 0x0406 status 0x0c
+... <no iso_* activity on sk/conn> ...
+iso_sock_sendmsg:1098: sock 00000000dea5e2e0, sk 00000000eabd6557
+BUG: kernel NULL pointer dereference, address: 0000000000000668
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:iso_sock_sendmsg (net/bluetooth/iso.c:1112) bluetooth
+==========================================================
+
+L2CAP:
+==================================================================
+hci_cmd_status_evt:4359: hci0: opcode 0x0406
+hci_cs_disconnect:2760: hci0: status 0x0c
+hci_sent_cmd_data:3085: hci0 opcode 0x0406
+hci_conn_del:1151: hci0 hcon ffff88800c999000 handle 3585
+hci_conn_unlink:1102: hci0: hcon ffff88800c999000
+hci_chan_list_flush:2780: hcon ffff88800c999000
+hci_chan_del:2761: hci0 hcon ffff88800c999000 chan ffff888018ddd280
+...
+BUG: KASAN: slab-use-after-free in hci_send_acl+0x2d/0x540 [bluetooth]
+Read of size 8 at addr ffff888018ddd298 by task bluetoothd/1175
+
+CPU: 0 PID: 1175 Comm: bluetoothd Tainted: G            E      6.4.0-rc4+ #2
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x5b/0x90
+ print_report+0xcf/0x670
+ ? __virt_addr_valid+0xf8/0x180
+ ? hci_send_acl+0x2d/0x540 [bluetooth]
+ kasan_report+0xa8/0xe0
+ ? hci_send_acl+0x2d/0x540 [bluetooth]
+ hci_send_acl+0x2d/0x540 [bluetooth]
+ ? __pfx___lock_acquire+0x10/0x10
+ l2cap_chan_send+0x1fd/0x1300 [bluetooth]
+ ? l2cap_sock_sendmsg+0xf2/0x170 [bluetooth]
+ ? __pfx_l2cap_chan_send+0x10/0x10 [bluetooth]
+ ? lock_release+0x1d5/0x3c0
+ ? mark_held_locks+0x1a/0x90
+ l2cap_sock_sendmsg+0x100/0x170 [bluetooth]
+ sock_write_iter+0x275/0x280
+ ? __pfx_sock_write_iter+0x10/0x10
+ ? __pfx___lock_acquire+0x10/0x10
+ do_iter_readv_writev+0x176/0x220
+ ? __pfx_do_iter_readv_writev+0x10/0x10
+ ? find_held_lock+0x83/0xa0
+ ? selinux_file_permission+0x13e/0x210
+ do_iter_write+0xda/0x340
+ vfs_writev+0x1b4/0x400
+ ? __pfx_vfs_writev+0x10/0x10
+ ? __seccomp_filter+0x112/0x750
+ ? populate_seccomp_data+0x182/0x220
+ ? __fget_light+0xdf/0x100
+ ? do_writev+0x19d/0x210
+ do_writev+0x19d/0x210
+ ? __pfx_do_writev+0x10/0x10
+ ? mark_held_locks+0x1a/0x90
+ do_syscall_64+0x60/0x90
+ ? lockdep_hardirqs_on_prepare+0x149/0x210
+ ? do_syscall_64+0x6c/0x90
+ ? lockdep_hardirqs_on_prepare+0x149/0x210
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7ff45cb23e64
+Code: 15 d1 1f 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 80 3d 9d a7 0d 00 00 74 13 b8 14 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89
+RSP: 002b:00007fff21ae09b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000014
+RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007ff45cb23e64
+RDX: 0000000000000001 RSI: 00007fff21ae0aa0 RDI: 0000000000000017
+RBP: 00007fff21ae0aa0 R08: 000000000095a8a0 R09: 0000607000053f40
+R10: 0000000000000001 R11: 0000000000000202 R12: 00007fff21ae0ac0
+R13: 00000fffe435c150 R14: 00007fff21ae0a80 R15: 000060f000000040
+ </TASK>
+
+Allocated by task 771:
+ kasan_save_stack+0x33/0x60
+ kasan_set_track+0x25/0x30
+ __kasan_kmalloc+0xaa/0xb0
+ hci_chan_create+0x67/0x1b0 [bluetooth]
+ l2cap_conn_add.part.0+0x17/0x590 [bluetooth]
+ l2cap_connect_cfm+0x266/0x6b0 [bluetooth]
+ hci_le_remote_feat_complete_evt+0x167/0x310 [bluetooth]
+ hci_event_packet+0x38d/0x800 [bluetooth]
+ hci_rx_work+0x287/0xb20 [bluetooth]
+ process_one_work+0x4f7/0x970
+ worker_thread+0x8f/0x620
+ kthread+0x17f/0x1c0
+ ret_from_fork+0x2c/0x50
+
+Freed by task 771:
+ kasan_save_stack+0x33/0x60
+ kasan_set_track+0x25/0x30
+ kasan_save_free_info+0x2e/0x50
+ ____kasan_slab_free+0x169/0x1c0
+ slab_free_freelist_hook+0x9e/0x1c0
+ __kmem_cache_free+0xc0/0x310
+ hci_chan_list_flush+0x46/0x90 [bluetooth]
+ hci_conn_cleanup+0x7d/0x330 [bluetooth]
+ hci_cs_disconnect+0x35d/0x530 [bluetooth]
+ hci_cmd_status_evt+0xef/0x2b0 [bluetooth]
+ hci_event_packet+0x38d/0x800 [bluetooth]
+ hci_rx_work+0x287/0xb20 [bluetooth]
+ process_one_work+0x4f7/0x970
+ worker_thread+0x8f/0x620
+ kthread+0x17f/0x1c0
+ ret_from_fork+0x2c/0x50
+==================================================================
+
+Fixes: b8d290525e39 ("Bluetooth: clean up connection in hci_cs_disconnect")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index ec9b0612f2761..83eaf25ece465 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -2789,6 +2789,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
+                       hci_enable_advertising(hdev);
+               }
+ 
++              /* Inform sockets conn is gone before we delete it */
++              hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
++
+               goto done;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch b/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch

new file mode 100644 (file)

index 0000000..f4cce42
--- /dev/null
+++ b/queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch
@@ -0,0 +1,60 @@
+From 37d8d1ea773870a99ffb70e4fb61facc4b296dfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 15:33:14 -0700
+Subject: Bluetooth: hci_sync: Avoid use-after-free in dbg for
+ hci_remove_adv_monitor()
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit de6dfcefd107667ce2dbedf4d9337f5ed557a4a1 ]
+
+KASAN reports that there's a use-after-free in
+hci_remove_adv_monitor(). Trawling through the disassembly, you can
+see that the complaint is from the access in bt_dev_dbg() under the
+HCI_ADV_MONITOR_EXT_MSFT case. The problem case happens because
+msft_remove_monitor() can end up freeing the monitor
+structure. Specifically:
+  hci_remove_adv_monitor() ->
+  msft_remove_monitor() ->
+  msft_remove_monitor_sync() ->
+  msft_le_cancel_monitor_advertisement_cb() ->
+  hci_free_adv_monitor()
+
+Let's fix the problem by just stashing the relevant data when it's
+still valid.
+
+Fixes: 7cf5c2978f23 ("Bluetooth: hci_sync: Refactor remove Adv Monitor")
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_core.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index be0e6865b340f..d034bf2a999e1 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
+                                 struct adv_monitor *monitor)
+ {
+       int status = 0;
++      int handle;
+ 
+       switch (hci_get_adv_monitor_offload_ext(hdev)) {
+       case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
+@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
+               goto free_monitor;
+ 
+       case HCI_ADV_MONITOR_EXT_MSFT:
++              handle = monitor->handle;
+               status = msft_remove_monitor(hdev, monitor);
+               bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
+-                         hdev->name, monitor->handle, status);
++                         hdev->name, handle, status);
+               break;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch b/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch

new file mode 100644 (file)

index 0000000..997d943
--- /dev/null
+++ b/queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch
@@ -0,0 +1,292 @@
+From 1bba473b620234ccdcf3a2b08e021f5b27202ce4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:33 +0300
+Subject: Bluetooth: ISO: fix iso_conn related locking and validity issues
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit d40ae85ee62e3666f45bc61864b22121346f88ef ]
+
+sk->sk_state indicates whether iso_pi(sk)->conn is valid. Operations
+that check/update sk_state and access conn should hold lock_sock,
+otherwise they can race.
+
+The order of taking locks is hci_dev_lock > lock_sock > iso_conn_lock,
+which is how it is in connect/disconnect_cfm -> iso_conn_del ->
+iso_chan_del.
+
+Fix locking in iso_connect_cis/bis and sendmsg/recvmsg to take lock_sock
+around updating sk_state and conn.
+
+iso_conn_del must not occur during iso_connect_cis/bis, as it frees the
+iso_conn. Hold hdev->lock longer to prevent that.
+
+This should not reintroduce the issue fixed in commit 241f51931c35
+("Bluetooth: ISO: Avoid circular locking dependency"), since the we
+acquire locks in order. We retain the fix in iso_sock_connect to release
+lock_sock before iso_connect_* acquires hdev->lock.
+
+Similarly for commit 6a5ad251b7cd ("Bluetooth: ISO: Fix possible
+circular locking dependency"). We retain the fix in iso_conn_ready to
+not acquire iso_conn_lock before lock_sock.
+
+iso_conn_add shall return iso_conn with valid hcon. Make it so also when
+reusing an old CIS connection waiting for disconnect timeout (see
+__iso_sock_close where conn->hcon is set to NULL).
+
+Trace with iso_conn_del after iso_chan_add in iso_connect_cis:
+===============================================================
+iso_sock_create:771: sock 00000000be9b69b7
+iso_sock_init:693: sk 000000004dff667e
+iso_sock_bind:827: sk 000000004dff667e 70:1a:b8:98:ff:a2 type 1
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_connect:875: sk 000000004dff667e
+iso_connect_cis:353: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+hci_conn_add:1005: hci0 dst 28:3d:c2:4a:7e:da
+iso_conn_add:140: hcon 000000007b65d182 conn 00000000daf8625e
+__iso_chan_add:214: conn 00000000daf8625e
+iso_connect_cfm:1700: hcon 000000007b65d182 bdaddr 28:3d:c2:4a:7e:da status 12
+iso_conn_del:187: hcon 000000007b65d182 conn 00000000daf8625e, err 16
+iso_sock_clear_timer:117: sock 000000004dff667e state 3
+    <Note: sk_state is BT_BOUND (3), so iso_connect_cis is still
+    running at this point>
+iso_chan_del:153: sk 000000004dff667e, conn 00000000daf8625e, err 16
+hci_conn_del:1151: hci0 hcon 000000007b65d182 handle 65535
+hci_conn_unlink:1102: hci0: hcon 000000007b65d182
+hci_chan_list_flush:2780: hcon 000000007b65d182
+iso_sock_getsockopt:1376: sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getsockopt:1376: sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_shutdown:1434: sock 00000000be9b69b7, sk 000000004dff667e, how 1
+__iso_sock_close:632: sk 000000004dff667e state 5 socket 00000000be9b69b7
+     <Note: sk_state is BT_CONNECT (5), even though iso_chan_del sets
+     BT_CLOSED (6). Only iso_connect_cis sets it to BT_CONNECT, so it
+     must be that iso_chan_del occurred between iso_chan_add and end of
+     iso_connect_cis.>
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+PGD 8000000006467067 P4D 8000000006467067 PUD 3f5f067 PMD 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:__iso_sock_close (net/bluetooth/iso.c:664) bluetooth
+===============================================================
+
+Trace with iso_conn_del before iso_chan_add in iso_connect_cis:
+===============================================================
+iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+...
+iso_conn_add:140: hcon 0000000093bc551f conn 00000000768ae504
+hci_dev_put:1487: hci0 orig refcnt 21
+hci_event_packet:7607: hci0: event 0x0e
+hci_cmd_complete_evt:4231: hci0: opcode 0x2062
+hci_cc_le_set_cig_params:3846: hci0: status 0x07
+hci_sent_cmd_data:3107: hci0 opcode 0x2062
+iso_connect_cfm:1703: hcon 0000000093bc551f bdaddr 28:3d:c2:4a:7e:da status 7
+iso_conn_del:187: hcon 0000000093bc551f conn 00000000768ae504, err 12
+hci_conn_del:1151: hci0 hcon 0000000093bc551f handle 65535
+hci_conn_unlink:1102: hci0: hcon 0000000093bc551f
+hci_chan_list_flush:2780: hcon 0000000093bc551f
+__iso_chan_add:214: conn 00000000768ae504
+    <Note: this conn was already freed in iso_conn_del above>
+iso_sock_clear_timer:117: sock 0000000098323f95 state 3
+general protection fault, probably for non-canonical address 0x30b29c630930aec8: 0000 [#1] PREEMPT SMP PTI
+CPU: 1 PID: 1920 Comm: bluetoothd Tainted: G            E      6.3.0-rc7+ #4
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:detach_if_pending+0x28/0xd0
+Code: 90 90 0f 1f 44 00 00 48 8b 47 08 48 85 c0 0f 84 ad 00 00 00 55 89 d5 53 48 83 3f 00 48 89 fb 74 7d 66 90 48 8b 03 48 8b 53 08 <>
+RSP: 0018:ffffb90841a67d08 EFLAGS: 00010007
+RAX: 0000000000000000 RBX: ffff9141bd5061b8 RCX: 0000000000000000
+RDX: 30b29c630930aec8 RSI: ffff9141fdd21e80 RDI: ffff9141bd5061b8
+RBP: 0000000000000001 R08: 0000000000000000 R09: ffffb90841a67b88
+R10: 0000000000000003 R11: ffffffff8613f558 R12: ffff9141fdd21e80
+R13: 0000000000000000 R14: ffff9141b5976010 R15: ffff914185755338
+FS:  00007f45768bd840(0000) GS:ffff9141fdd00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000619000424074 CR3: 0000000009f5e005 CR4: 0000000000170ee0
+Call Trace:
+ <TASK>
+ timer_delete+0x48/0x80
+ try_to_grab_pending+0xdf/0x170
+ __cancel_work+0x37/0xb0
+ iso_connect_cis+0x141/0x400 [bluetooth]
+===============================================================
+
+Trace with NULL conn->hcon in state BT_CONNECT:
+===============================================================
+__iso_sock_close:619: sk 00000000f7c71fc5 state 1 socket 00000000d90c5fe5
+...
+__iso_sock_close:619: sk 00000000f7c71fc5 state 8 socket 00000000d90c5fe5
+iso_chan_del:153: sk 00000000f7c71fc5, conn 0000000022c03a7e, err 104
+...
+iso_sock_connect:862: sk 00000000129b56c3
+iso_connect_cis:348: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a
+hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a
+hci_dev_hold:1495: hci0 orig refcnt 19
+__iso_chan_add:214: conn 0000000022c03a7e
+    <Note: reusing old conn>
+iso_sock_clear_timer:117: sock 00000000129b56c3 state 3
+...
+iso_sock_ready:1485: sk 00000000129b56c3
+...
+iso_sock_sendmsg:1077: sock 00000000e5013966, sk 00000000129b56c3
+BUG: kernel NULL pointer dereference, address: 00000000000006a8
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 1 PID: 1403 Comm: wireplumber Tainted: G            E      6.3.0-rc7+ #4
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:iso_sock_sendmsg+0x63/0x2a0 [bluetooth]
+===============================================================
+
+Fixes: 241f51931c35 ("Bluetooth: ISO: Avoid circular locking dependency")
+Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/iso.c | 53 ++++++++++++++++++++++++++-------------------
+ 1 file changed, 31 insertions(+), 22 deletions(-)
+
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index cb959e8eac185..699e4f400df29 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -116,8 +116,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
+ {
+       struct iso_conn *conn = hcon->iso_data;
+ 
+-      if (conn)
++      if (conn) {
++              if (!conn->hcon)
++                      conn->hcon = hcon;
+               return conn;
++      }
+ 
+       conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+       if (!conn)
+@@ -285,14 +288,13 @@ static int iso_connect_bis(struct sock *sk)
+               goto unlock;
+       }
+ 
+-      hci_dev_unlock(hdev);
+-      hci_dev_put(hdev);
++      lock_sock(sk);
+ 
+       err = iso_chan_add(conn, sk, NULL);
+-      if (err)
+-              return err;
+-
+-      lock_sock(sk);
++      if (err) {
++              release_sock(sk);
++              goto unlock;
++      }
+ 
+       /* Update source addr of the socket */
+       bacpy(&iso_pi(sk)->src, &hcon->src);
+@@ -306,7 +308,6 @@ static int iso_connect_bis(struct sock *sk)
+       }
+ 
+       release_sock(sk);
+-      return err;
+ 
+ unlock:
+       hci_dev_unlock(hdev);
+@@ -367,14 +368,13 @@ static int iso_connect_cis(struct sock *sk)
+               goto unlock;
+       }
+ 
+-      hci_dev_unlock(hdev);
+-      hci_dev_put(hdev);
++      lock_sock(sk);
+ 
+       err = iso_chan_add(conn, sk, NULL);
+-      if (err)
+-              return err;
+-
+-      lock_sock(sk);
++      if (err) {
++              release_sock(sk);
++              goto unlock;
++      }
+ 
+       /* Update source addr of the socket */
+       bacpy(&iso_pi(sk)->src, &hcon->src);
+@@ -391,7 +391,6 @@ static int iso_connect_cis(struct sock *sk)
+       }
+ 
+       release_sock(sk);
+-      return err;
+ 
+ unlock:
+       hci_dev_unlock(hdev);
+@@ -1036,8 +1035,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+                           size_t len)
+ {
+       struct sock *sk = sock->sk;
+-      struct iso_conn *conn = iso_pi(sk)->conn;
+       struct sk_buff *skb, **frag;
++      size_t mtu;
+       int err;
+ 
+       BT_DBG("sock %p, sk %p", sock, sk);
+@@ -1049,11 +1048,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+       if (msg->msg_flags & MSG_OOB)
+               return -EOPNOTSUPP;
+ 
+-      if (sk->sk_state != BT_CONNECTED)
++      lock_sock(sk);
++
++      if (sk->sk_state != BT_CONNECTED) {
++              release_sock(sk);
+               return -ENOTCONN;
++      }
++
++      mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
++
++      release_sock(sk);
+ 
+-      skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+-                           HCI_ISO_DATA_HDR_SIZE, 0);
++      skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
+       if (IS_ERR(skb))
+               return PTR_ERR(skb);
+ 
+@@ -1066,8 +1072,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+       while (len) {
+               struct sk_buff *tmp;
+ 
+-              tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+-                                   0, 0);
++              tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
+               if (IS_ERR(tmp)) {
+                       kfree_skb(skb);
+                       return PTR_ERR(tmp);
+@@ -1122,15 +1127,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+       BT_DBG("sk %p", sk);
+ 
+       if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
++              lock_sock(sk);
+               switch (sk->sk_state) {
+               case BT_CONNECT2:
+-                      lock_sock(sk);
+                       iso_conn_defer_accept(pi->conn->hcon);
+                       sk->sk_state = BT_CONFIG;
+                       release_sock(sk);
+                       return 0;
+               case BT_CONNECT:
++                      release_sock(sk);
+                       return iso_connect_cis(sk);
++              default:
++                      release_sock(sk);
++                      break;
+               }
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch b/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch

new file mode 100644 (file)

index 0000000..8a341eb
--- /dev/null
+++ b/queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch
@@ -0,0 +1,594 @@
+From 6fa1ac47040a970b9823dd880eeff4a1f5d2c7a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:31 +0300
+Subject: Bluetooth: use RCU for hci_conn_params and iterate safely in hci_sync
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit 195ef75e19287b4bc413da3e3e3722b030ac881e ]
+
+hci_update_accept_list_sync iterates over hdev->pend_le_conns and
+hdev->pend_le_reports, and waits for controller events in the loop body,
+without holding hdev lock.
+
+Meanwhile, these lists and the items may be modified e.g. by
+le_scan_cleanup. This can invalidate the list cursor or any other item
+in the list, resulting to invalid behavior (eg use-after-free).
+
+Use RCU for the hci_conn_params action lists. Since the loop bodies in
+hci_sync block and we cannot use RCU or hdev->lock for the whole loop,
+copy list items first and then iterate on the copy. Only the flags field
+is written from elsewhere, so READ_ONCE/WRITE_ONCE should guarantee we
+read valid values.
+
+Free params everywhere with hci_conn_params_free so the cleanup is
+guaranteed to be done properly.
+
+This fixes the following, which can be triggered e.g. by BlueZ new
+mgmt-tester case "Add + Remove Device Nowait - Success", or by changing
+hci_le_set_cig_params to always return false, and running iso-tester:
+
+==================================================================
+BUG: KASAN: slab-use-after-free in hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+Read of size 8 at addr ffff888001265018 by task kworker/u3:0/32
+
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+Workqueue: hci0 hci_cmd_sync_work
+Call Trace:
+<TASK>
+dump_stack_lvl (./arch/x86/include/asm/irqflags.h:134 lib/dump_stack.c:107)
+print_report (mm/kasan/report.c:320 mm/kasan/report.c:430)
+? __virt_addr_valid (./include/linux/mmzone.h:1915 ./include/linux/mmzone.h:2011 arch/x86/mm/physaddr.c:65)
+? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+kasan_report (mm/kasan/report.c:538)
+? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+? __pfx_hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2780)
+? mutex_lock (kernel/locking/mutex.c:282)
+? __pfx_mutex_lock (kernel/locking/mutex.c:282)
+? __pfx_mutex_unlock (kernel/locking/mutex.c:538)
+? __pfx_update_passive_scan_sync (net/bluetooth/hci_sync.c:2861)
+hci_cmd_sync_work (net/bluetooth/hci_sync.c:306)
+process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399)
+worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538)
+? __pfx_worker_thread (kernel/workqueue.c:2480)
+kthread (kernel/kthread.c:376)
+? __pfx_kthread (kernel/kthread.c:331)
+ret_from_fork (arch/x86/entry/entry_64.S:314)
+</TASK>
+
+Allocated by task 31:
+kasan_save_stack (mm/kasan/common.c:46)
+kasan_set_track (mm/kasan/common.c:52)
+__kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383)
+hci_conn_params_add (./include/linux/slab.h:580 ./include/linux/slab.h:720 net/bluetooth/hci_core.c:2277)
+hci_connect_le_scan (net/bluetooth/hci_conn.c:1419 net/bluetooth/hci_conn.c:1589)
+hci_connect_cis (net/bluetooth/hci_conn.c:2266)
+iso_connect_cis (net/bluetooth/iso.c:390)
+iso_sock_connect (net/bluetooth/iso.c:899)
+__sys_connect (net/socket.c:2003 net/socket.c:2020)
+__x64_sys_connect (net/socket.c:2027)
+do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+Freed by task 15:
+kasan_save_stack (mm/kasan/common.c:46)
+kasan_set_track (mm/kasan/common.c:52)
+kasan_save_free_info (mm/kasan/generic.c:523)
+__kasan_slab_free (mm/kasan/common.c:238 mm/kasan/common.c:200 mm/kasan/common.c:244)
+__kmem_cache_free (mm/slub.c:1807 mm/slub.c:3787 mm/slub.c:3800)
+hci_conn_params_del (net/bluetooth/hci_core.c:2323)
+le_scan_cleanup (net/bluetooth/hci_conn.c:202)
+process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399)
+worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538)
+kthread (kernel/kthread.c:376)
+ret_from_fork (arch/x86/entry/entry_64.S:314)
+==================================================================
+
+Fixes: e8907f76544f ("Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 3")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/hci_core.h |   5 ++
+ net/bluetooth/hci_conn.c         |  10 +--
+ net/bluetooth/hci_core.c         |  38 ++++++++--
+ net/bluetooth/hci_event.c        |  12 ++--
+ net/bluetooth/hci_sync.c         | 117 ++++++++++++++++++++++++++++---
+ net/bluetooth/mgmt.c             |  26 +++----
+ 6 files changed, 164 insertions(+), 44 deletions(-)
+
+diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
+index 84c5ce57eab69..ddbcbf9ccb2ce 100644
+--- a/include/net/bluetooth/hci_core.h
++++ b/include/net/bluetooth/hci_core.h
+@@ -807,6 +807,7 @@ struct hci_conn_params {
+ 
+       struct hci_conn *conn;
+       bool explicit_connect;
++      /* Accessed without hdev->lock: */
+       hci_conn_flags_t flags;
+       u8  privacy_mode;
+ };
+@@ -1536,7 +1537,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+                                           bdaddr_t *addr, u8 addr_type);
+ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
+ void hci_conn_params_clear_disabled(struct hci_dev *hdev);
++void hci_conn_params_free(struct hci_conn_params *param);
+ 
++void hci_pend_le_list_del_init(struct hci_conn_params *param);
++void hci_pend_le_list_add(struct hci_conn_params *param,
++                        struct list_head *list);
+ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
+                                                 bdaddr_t *addr,
+                                                 u8 addr_type);
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index fef09d2121384..61059571c8779 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -117,7 +117,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
+        */
+       params->explicit_connect = false;
+ 
+-      list_del_init(&params->action);
++      hci_pend_le_list_del_init(params);
+ 
+       switch (params->auto_connect) {
+       case HCI_AUTO_CONN_EXPLICIT:
+@@ -126,10 +126,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
+               return;
+       case HCI_AUTO_CONN_DIRECT:
+       case HCI_AUTO_CONN_ALWAYS:
+-              list_add(&params->action, &hdev->pend_le_conns);
++              hci_pend_le_list_add(params, &hdev->pend_le_conns);
+               break;
+       case HCI_AUTO_CONN_REPORT:
+-              list_add(&params->action, &hdev->pend_le_reports);
++              hci_pend_le_list_add(params, &hdev->pend_le_reports);
+               break;
+       default:
+               break;
+@@ -1398,8 +1398,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
+       if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
+           params->auto_connect == HCI_AUTO_CONN_REPORT ||
+           params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
+-              list_del_init(&params->action);
+-              list_add(&params->action, &hdev->pend_le_conns);
++              hci_pend_le_list_del_init(params);
++              hci_pend_le_list_add(params, &hdev->pend_le_conns);
+       }
+ 
+       params->explicit_connect = true;
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index ca42129f8f91a..be0e6865b340f 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -2249,21 +2249,45 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
+       return NULL;
+ }
+ 
+-/* This function requires the caller holds hdev->lock */
++/* This function requires the caller holds hdev->lock or rcu_read_lock */
+ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
+                                                 bdaddr_t *addr, u8 addr_type)
+ {
+       struct hci_conn_params *param;
+ 
+-      list_for_each_entry(param, list, action) {
++      rcu_read_lock();
++
++      list_for_each_entry_rcu(param, list, action) {
+               if (bacmp(&param->addr, addr) == 0 &&
+-                  param->addr_type == addr_type)
++                  param->addr_type == addr_type) {
++                      rcu_read_unlock();
+                       return param;
++              }
+       }
+ 
++      rcu_read_unlock();
++
+       return NULL;
+ }
+ 
++/* This function requires the caller holds hdev->lock */
++void hci_pend_le_list_del_init(struct hci_conn_params *param)
++{
++      if (list_empty(&param->action))
++              return;
++
++      list_del_rcu(&param->action);
++      synchronize_rcu();
++      INIT_LIST_HEAD(&param->action);
++}
++
++/* This function requires the caller holds hdev->lock */
++void hci_pend_le_list_add(struct hci_conn_params *param,
++                        struct list_head *list)
++{
++      list_add_rcu(&param->action, list);
++}
++
+ /* This function requires the caller holds hdev->lock */
+ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+                                           bdaddr_t *addr, u8 addr_type)
+@@ -2297,14 +2321,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+       return params;
+ }
+ 
+-static void hci_conn_params_free(struct hci_conn_params *params)
++void hci_conn_params_free(struct hci_conn_params *params)
+ {
++      hci_pend_le_list_del_init(params);
++
+       if (params->conn) {
+               hci_conn_drop(params->conn);
+               hci_conn_put(params->conn);
+       }
+ 
+-      list_del(&params->action);
+       list_del(&params->list);
+       kfree(params);
+ }
+@@ -2342,8 +2367,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
+                       continue;
+               }
+ 
+-              list_del(&params->list);
+-              kfree(params);
++              hci_conn_params_free(params);
+       }
+ 
+       BT_DBG("All LE disabled connection parameters were removed");
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index b272cc1f36481..ec9b0612f2761 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -1558,7 +1558,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
+ 
+       params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
+       if (params)
+-              params->privacy_mode = cp->mode;
++              WRITE_ONCE(params->privacy_mode, cp->mode);
+ 
+       hci_dev_unlock(hdev);
+ 
+@@ -2809,8 +2809,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
+ 
+               case HCI_AUTO_CONN_DIRECT:
+               case HCI_AUTO_CONN_ALWAYS:
+-                      list_del_init(&params->action);
+-                      list_add(&params->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_del_init(params);
++                      hci_pend_le_list_add(params, &hdev->pend_le_conns);
+                       break;
+ 
+               default:
+@@ -3428,8 +3428,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
+ 
+               case HCI_AUTO_CONN_DIRECT:
+               case HCI_AUTO_CONN_ALWAYS:
+-                      list_del_init(&params->action);
+-                      list_add(&params->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_del_init(params);
++                      hci_pend_le_list_add(params, &hdev->pend_le_conns);
+                       hci_update_passive_scan(hdev);
+                       break;
+ 
+@@ -5952,7 +5952,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
+       params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
+                                          conn->dst_type);
+       if (params) {
+-              list_del_init(&params->action);
++              hci_pend_le_list_del_init(params);
+               if (params->conn) {
+                       hci_conn_drop(params->conn);
+                       hci_conn_put(params->conn);
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index 37131a36700a1..2ae038dfc39f7 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -2139,15 +2139,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
+       return 0;
+ }
+ 
++struct conn_params {
++      bdaddr_t addr;
++      u8 addr_type;
++      hci_conn_flags_t flags;
++      u8 privacy_mode;
++};
++
+ /* Adds connection to resolve list if needed.
+  * Setting params to NULL programs local hdev->irk
+  */
+ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+-                                      struct hci_conn_params *params)
++                                      struct conn_params *params)
+ {
+       struct hci_cp_le_add_to_resolv_list cp;
+       struct smp_irk *irk;
+       struct bdaddr_list_with_irk *entry;
++      struct hci_conn_params *p;
+ 
+       if (!use_ll_privacy(hdev))
+               return 0;
+@@ -2182,6 +2190,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+       /* Default privacy mode is always Network */
+       params->privacy_mode = HCI_NETWORK_PRIVACY;
+ 
++      rcu_read_lock();
++      p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
++                                    &params->addr, params->addr_type);
++      if (!p)
++              p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
++                                            &params->addr, params->addr_type);
++      if (p)
++              WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
++      rcu_read_unlock();
++
+ done:
+       if (hci_dev_test_flag(hdev, HCI_PRIVACY))
+               memcpy(cp.local_irk, hdev->irk, 16);
+@@ -2194,7 +2212,7 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+ 
+ /* Set Device Privacy Mode. */
+ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+-                                      struct hci_conn_params *params)
++                                      struct conn_params *params)
+ {
+       struct hci_cp_le_set_privacy_mode cp;
+       struct smp_irk *irk;
+@@ -2219,6 +2237,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+       bacpy(&cp.bdaddr, &irk->bdaddr);
+       cp.mode = HCI_DEVICE_PRIVACY;
+ 
++      /* Note: params->privacy_mode is not updated since it is a copy */
++
+       return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
+                                    sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ }
+@@ -2228,7 +2248,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+  * properly set the privacy mode.
+  */
+ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
+-                                     struct hci_conn_params *params,
++                                     struct conn_params *params,
+                                      u8 *num_entries)
+ {
+       struct hci_cp_le_add_to_accept_list cp;
+@@ -2426,6 +2446,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
+       return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
+ }
+ 
++static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
++{
++      struct hci_conn_params *params;
++      struct conn_params *p;
++      size_t i;
++
++      rcu_read_lock();
++
++      i = 0;
++      list_for_each_entry_rcu(params, list, action)
++              ++i;
++      *n = i;
++
++      rcu_read_unlock();
++
++      p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
++      if (!p)
++              return NULL;
++
++      rcu_read_lock();
++
++      i = 0;
++      list_for_each_entry_rcu(params, list, action) {
++              /* Racing adds are handled in next scan update */
++              if (i >= *n)
++                      break;
++
++              /* No hdev->lock, but: addr, addr_type are immutable.
++               * privacy_mode is only written by us or in
++               * hci_cc_le_set_privacy_mode that we wait for.
++               * We should be idempotent so MGMT updating flags
++               * while we are processing is OK.
++               */
++              bacpy(&p[i].addr, &params->addr);
++              p[i].addr_type = params->addr_type;
++              p[i].flags = READ_ONCE(params->flags);
++              p[i].privacy_mode = READ_ONCE(params->privacy_mode);
++              ++i;
++      }
++
++      rcu_read_unlock();
++
++      *n = i;
++      return p;
++}
++
+ /* Device must not be scanning when updating the accept list.
+  *
+  * Update is done using the following sequence:
+@@ -2445,11 +2511,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
+  */
+ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+ {
+-      struct hci_conn_params *params;
++      struct conn_params *params;
+       struct bdaddr_list *b, *t;
+       u8 num_entries = 0;
+       bool pend_conn, pend_report;
+       u8 filter_policy;
++      size_t i, n;
+       int err;
+ 
+       /* Pause advertising if resolving list can be used as controllers
+@@ -2483,6 +2550,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+               if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
+                       continue;
+ 
++              /* Pointers not dereferenced, no locks needed */
+               pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+                                                     &b->bdaddr,
+                                                     b->bdaddr_type);
+@@ -2511,23 +2579,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+        * available accept list entries in the controller, then
+        * just abort and return filer policy value to not use the
+        * accept list.
++       *
++       * The list and params may be mutated while we wait for events,
++       * so make a copy and iterate it.
+        */
+-      list_for_each_entry(params, &hdev->pend_le_conns, action) {
+-              err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
+-              if (err)
++
++      params = conn_params_copy(&hdev->pend_le_conns, &n);
++      if (!params) {
++              err = -ENOMEM;
++              goto done;
++      }
++
++      for (i = 0; i < n; ++i) {
++              err = hci_le_add_accept_list_sync(hdev, &params[i],
++                                                &num_entries);
++              if (err) {
++                      kvfree(params);
+                       goto done;
++              }
+       }
+ 
++      kvfree(params);
++
+       /* After adding all new pending connections, walk through
+        * the list of pending reports and also add these to the
+        * accept list if there is still space. Abort if space runs out.
+        */
+-      list_for_each_entry(params, &hdev->pend_le_reports, action) {
+-              err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
+-              if (err)
++
++      params = conn_params_copy(&hdev->pend_le_reports, &n);
++      if (!params) {
++              err = -ENOMEM;
++              goto done;
++      }
++
++      for (i = 0; i < n; ++i) {
++              err = hci_le_add_accept_list_sync(hdev, &params[i],
++                                                &num_entries);
++              if (err) {
++                      kvfree(params);
+                       goto done;
++              }
+       }
+ 
++      kvfree(params);
++
+       /* Use the allowlist unless the following conditions are all true:
+        * - We are not currently suspending
+        * - There are 1 or more ADV monitors registered and it's not offloaded
+@@ -4778,12 +4873,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
+       struct hci_conn_params *p;
+ 
+       list_for_each_entry(p, &hdev->le_conn_params, list) {
++              hci_pend_le_list_del_init(p);
+               if (p->conn) {
+                       hci_conn_drop(p->conn);
+                       hci_conn_put(p->conn);
+                       p->conn = NULL;
+               }
+-              list_del_init(&p->action);
+       }
+ 
+       BT_DBG("All LE pending actions cleared");
+diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
+index 815f2abe918ef..89c94f3e96bc3 100644
+--- a/net/bluetooth/mgmt.c
++++ b/net/bluetooth/mgmt.c
+@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev)
+               /* Needed for AUTO_OFF case where might not "really"
+                * have been powered off.
+                */
+-              list_del_init(&p->action);
++              hci_pend_le_list_del_init(p);
+ 
+               switch (p->auto_connect) {
+               case HCI_AUTO_CONN_DIRECT:
+               case HCI_AUTO_CONN_ALWAYS:
+-                      list_add(&p->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_add(p, &hdev->pend_le_conns);
+                       break;
+               case HCI_AUTO_CONN_REPORT:
+-                      list_add(&p->action, &hdev->pend_le_reports);
++                      hci_pend_le_list_add(p, &hdev->pend_le_reports);
+                       break;
+               default:
+                       break;
+@@ -5161,7 +5161,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+               goto unlock;
+       }
+ 
+-      params->flags = current_flags;
++      WRITE_ONCE(params->flags, current_flags);
+       status = MGMT_STATUS_SUCCESS;
+ 
+       /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
+@@ -7573,7 +7573,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
+       if (params->auto_connect == auto_connect)
+               return 0;
+ 
+-      list_del_init(&params->action);
++      hci_pend_le_list_del_init(params);
+ 
+       switch (auto_connect) {
+       case HCI_AUTO_CONN_DISABLED:
+@@ -7582,18 +7582,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
+                * connect to device, keep connecting.
+                */
+               if (params->explicit_connect)
+-                      list_add(&params->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_add(params, &hdev->pend_le_conns);
+               break;
+       case HCI_AUTO_CONN_REPORT:
+               if (params->explicit_connect)
+-                      list_add(&params->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_add(params, &hdev->pend_le_conns);
+               else
+-                      list_add(&params->action, &hdev->pend_le_reports);
++                      hci_pend_le_list_add(params, &hdev->pend_le_reports);
+               break;
+       case HCI_AUTO_CONN_DIRECT:
+       case HCI_AUTO_CONN_ALWAYS:
+               if (!is_connected(hdev, addr, addr_type))
+-                      list_add(&params->action, &hdev->pend_le_conns);
++                      hci_pend_le_list_add(params, &hdev->pend_le_conns);
+               break;
+       }
+ 
+@@ -7816,9 +7816,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
+                       goto unlock;
+               }
+ 
+-              list_del(&params->action);
+-              list_del(&params->list);
+-              kfree(params);
++              hci_conn_params_free(params);
+ 
+               device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
+       } else {
+@@ -7849,9 +7847,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
+                               p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+                               continue;
+                       }
+-                      list_del(&p->action);
+-                      list_del(&p->list);
+-                      kfree(p);
++                      hci_conn_params_free(p);
+               }
+ 
+               bt_dev_dbg(hdev, "All LE connection parameters were removed");
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch b/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch

new file mode 100644 (file)

index 0000000..9da0f1b
--- /dev/null
+++ b/queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch
@@ -0,0 +1,177 @@
+From ccf4979c64a589eed4428fcc3fc6a92a8627c659 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 May 2023 21:37:48 -0700
+Subject: bpf: Address KCSAN report on bpf_lru_list
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ]
+
+KCSAN reported a data-race when accessing node->ref.
+Although node->ref does not have to be accurate,
+take this chance to use a more common READ_ONCE() and WRITE_ONCE()
+pattern instead of data_race().
+
+There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
+This patch also adds bpf_lru_node_clear_ref() to do the
+WRITE_ONCE(node->ref, 0) also.
+
+==================================================================
+BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem
+
+write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
+__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
+__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
+__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
+bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
+bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
+bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
+prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
+__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
+bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
+__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x01 -> 0x00
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
+==================================================================
+
+Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_lru_list.c | 21 +++++++++++++--------
+ kernel/bpf/bpf_lru_list.h |  7 ++-----
+ 2 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
+index d99e89f113c43..3dabdd137d102 100644
+--- a/kernel/bpf/bpf_lru_list.c
++++ b/kernel/bpf/bpf_lru_list.c
+@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
+ /* bpf_lru_node helpers */
+ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
+ {
+-      return node->ref;
++      return READ_ONCE(node->ref);
++}
++
++static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
++{
++      WRITE_ONCE(node->ref, 0);
+ }
+ 
+ static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
+@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
+ 
+       bpf_lru_list_count_inc(l, tgt_type);
+       node->type = tgt_type;
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+       list_move(&node->list, &l->lists[tgt_type]);
+ }
+ 
+@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
+               bpf_lru_list_count_inc(l, tgt_type);
+               node->type = tgt_type;
+       }
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+ 
+       /* If the moving node is the next_inactive_rotation candidate,
+        * move the next_inactive_rotation pointer also.
+@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
+       *(u32 *)((void *)node + lru->hash_offset) = hash;
+       node->cpu = cpu;
+       node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+-      node->ref = 0;
++      bpf_lru_node_clear_ref(node);
+       list_add(&node->list, local_pending_list(loc_l));
+ }
+ 
+@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+       if (!list_empty(free_list)) {
+               node = list_first_entry(free_list, struct bpf_lru_node, list);
+               *(u32 *)((void *)node + lru->hash_offset) = hash;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+       }
+ 
+@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
+               }
+ 
+               node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_move(&node->list, local_free_list(loc_l));
+ 
+               raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
+ 
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->type = BPF_LRU_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               buf += elem_size;
+       }
+@@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
+               node = (struct bpf_lru_node *)(buf + node_offset);
+               node->cpu = cpu;
+               node->type = BPF_LRU_LIST_T_FREE;
+-              node->ref = 0;
++              bpf_lru_node_clear_ref(node);
+               list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+               i++;
+               buf += elem_size;
+diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
+index 4ea227c9c1ade..8f3c8b2b4490e 100644
+--- a/kernel/bpf/bpf_lru_list.h
++++ b/kernel/bpf/bpf_lru_list.h
+@@ -64,11 +64,8 @@ struct bpf_lru {
+ 
+ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
+ {
+-      /* ref is an approximation on access frequency.  It does not
+-       * have to be very accurate.  Hence, no protection is used.
+-       */
+-      if (!node->ref)
+-              node->ref = 1;
++      if (!READ_ONCE(node->ref))
++              WRITE_ONCE(node->ref, 1);
+ }
+ 
+ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch b/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch

new file mode 100644 (file)

index 0000000..c3a7b30
--- /dev/null
+++ b/queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch
@@ -0,0 +1,55 @@
+From 0a9f7c72db338d808de8b35708d487940038ce8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 09:49:31 -0700
+Subject: bpf, arm64: Fix BTI type used for freplace attached functions
+
+From: Alexander Duyck <alexanderduyck@fb.com>
+
+[ Upstream commit a3f25d614bc73b45e8f02adc6769876dfd16ca84 ]
+
+When running an freplace attached bpf program on an arm64 system w were
+seeing the following issue:
+  Unhandled 64-bit el1h sync exception on CPU47, ESR 0x0000000036000003 -- BTI
+
+After a bit of work to track it down I determined that what appeared to be
+happening is that the 'bti c' at the start of the program was somehow being
+reached after a 'br' instruction. Further digging pointed me toward the
+fact that the function was attached via freplace. This in turn led me to
+build_plt which I believe is invoking the long jump which is triggering
+this error.
+
+To resolve it we can replace the 'bti c' with 'bti jc' and add a comment
+explaining why this has to be modified as such.
+
+Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64")
+Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
+Acked-by: Xu Kuohai <xukuohai@huawei.com>
+Link: https://lore.kernel.org/r/168926677665.316237.9953845318337455525.stgit@ahduyck-xeon-server.home.arpa
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/net/bpf_jit_comp.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
+index 8f16217c111c8..14134fd34ff79 100644
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -322,7 +322,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
+        *
+        */
+ 
+-      emit_bti(A64_BTI_C, ctx);
++      /* bpf function may be invoked by 3 instruction types:
++       * 1. bl, attached via freplace to bpf prog via short jump
++       * 2. br, attached via freplace to bpf prog via long jump
++       * 3. blr, working as a function pointer, used by emit_call.
++       * So BTI_JC should used here to support both br and blr.
++       */
++      emit_bti(A64_BTI_JC, ctx);
+ 
+       emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
+       emit(A64_NOP, ctx);
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch b/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch

new file mode 100644 (file)

index 0000000..fce380e
--- /dev/null
+++ b/queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch
@@ -0,0 +1,75 @@
+From 6136de53109de1a3979843917ce4f9c78823e3e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 21:45:28 +0530
+Subject: bpf: Fix subprog idx logic in check_max_stack_depth
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+[ Upstream commit ba7b3e7d5f9014be65879ede8fd599cb222901c9 ]
+
+The assignment to idx in check_max_stack_depth happens once we see a
+bpf_pseudo_call or bpf_pseudo_func. This is not an issue as the rest of
+the code performs a few checks and then pushes the frame to the frame
+stack, except the case of async callbacks. If the async callback case
+causes the loop iteration to be skipped, the idx assignment will be
+incorrect on the next iteration of the loop. The value stored in the
+frame stack (as the subprogno of the current subprog) will be incorrect.
+
+This leads to incorrect checks and incorrect tail_call_reachable
+marking. Save the target subprog in a new variable and only assign to
+idx once we are done with the is_async_cb check which may skip pushing
+of frame to the frame stack and subsequent stack depth checks and tail
+call markings.
+
+Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20230717161530.1238-2-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 8c3ededef3172..fdba4086881b3 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4336,7 +4336,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ continue_func:
+       subprog_end = subprog[idx + 1].start;
+       for (; i < subprog_end; i++) {
+-              int next_insn;
++              int next_insn, sidx;
+ 
+               if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
+                       continue;
+@@ -4346,14 +4346,14 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ 
+               /* find the callee */
+               next_insn = i + insn[i].imm + 1;
+-              idx = find_subprog(env, next_insn);
+-              if (idx < 0) {
++              sidx = find_subprog(env, next_insn);
++              if (sidx < 0) {
+                       WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+                                 next_insn);
+                       return -EFAULT;
+               }
+-              if (subprog[idx].is_async_cb) {
+-                      if (subprog[idx].has_tail_call) {
++              if (subprog[sidx].is_async_cb) {
++                      if (subprog[sidx].has_tail_call) {
+                               verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+                               return -EFAULT;
+                       }
+@@ -4362,6 +4362,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+                               continue;
+               }
+               i = next_insn;
++              idx = sidx;
+ 
+               if (subprog[idx].has_tail_call)
+                       tail_call_reachable = true;
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch b/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch

new file mode 100644 (file)

index 0000000..c113399
--- /dev/null
+++ b/queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch
@@ -0,0 +1,47 @@
+From cb24f938e033cedcefaf283a9d5f44beb406005c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 May 2023 11:14:18 -0700
+Subject: bpf: Print a warning only if writing to unprivileged_bpf_disabled.
+
+From: Kui-Feng Lee <thinker.li@gmail.com>
+
+[ Upstream commit fedf99200ab086c42a572fca1d7266b06cdc3e3f ]
+
+Only print the warning message if you are writing to
+"/proc/sys/kernel/unprivileged_bpf_disabled".
+
+The kernel may print an annoying warning when you read
+"/proc/sys/kernel/unprivileged_bpf_disabled" saying
+
+  WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible
+  via Spectre v2 BHB attacks!
+
+However, this message is only meaningful when the feature is
+disabled or enabled.
+
+Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/bpf/20230502181418.308479-1-kuifeng@meta.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 8633ec4f92df3..0c44a716f0a24 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5289,7 +5289,8 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
+               *(int *)table->data = unpriv_enable;
+       }
+ 
+-      unpriv_ebpf_notify(unpriv_enable);
++      if (write)
++              unpriv_ebpf_notify(unpriv_enable);
+ 
+       return ret;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch b/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch

new file mode 100644 (file)

index 0000000..80144d5
--- /dev/null
+++ b/queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch
@@ -0,0 +1,102 @@
+From 765e8a472e267495e5ef26af7754684c76f6627f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 21:45:29 +0530
+Subject: bpf: Repeat check_max_stack_depth for async callbacks
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+[ Upstream commit b5e9ad522c4ccd32d322877515cff8d47ed731b9 ]
+
+While the check_max_stack_depth function explores call chains emanating
+from the main prog, which is typically enough to cover all possible call
+chains, it doesn't explore those rooted at async callbacks unless the
+async callback will have been directly called, since unlike non-async
+callbacks it skips their instruction exploration as they don't
+contribute to stack depth.
+
+It could be the case that the async callback leads to a callchain which
+exceeds the stack depth, but this is never reachable while only
+exploring the entry point from main subprog. Hence, repeat the check for
+the main subprog *and* all async callbacks marked by the symbolic
+execution pass of the verifier, as execution of the program may begin at
+any of them.
+
+Consider functions with following stack depths:
+main: 256
+async: 256
+foo: 256
+
+main:
+    rX = async
+    bpf_timer_set_callback(...)
+
+async:
+    foo()
+
+Here, async is not descended as it does not contribute to stack depth of
+main (since it is referenced using bpf_pseudo_func and not
+bpf_pseudo_call). However, when async is invoked asynchronously, it will
+end up breaching the MAX_BPF_STACK limit by calling foo.
+
+Hence, in addition to main, we also need to explore call chains
+beginning at all async callback subprogs in a program.
+
+Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20230717161530.1238-3-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index fdba4086881b3..f25ce959fae64 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4288,16 +4288,17 @@ static int update_stack_depth(struct bpf_verifier_env *env,
+  * Since recursion is prevented by check_cfg() this algorithm
+  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+  */
+-static int check_max_stack_depth(struct bpf_verifier_env *env)
++static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
+ {
+-      int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
+       struct bpf_subprog_info *subprog = env->subprog_info;
+       struct bpf_insn *insn = env->prog->insnsi;
++      int depth = 0, frame = 0, i, subprog_end;
+       bool tail_call_reachable = false;
+       int ret_insn[MAX_CALL_FRAMES];
+       int ret_prog[MAX_CALL_FRAMES];
+       int j;
+ 
++      i = subprog[idx].start;
+ process_func:
+       /* protect against potential stack overflow that might happen when
+        * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+@@ -4398,6 +4399,22 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+       goto continue_func;
+ }
+ 
++static int check_max_stack_depth(struct bpf_verifier_env *env)
++{
++      struct bpf_subprog_info *si = env->subprog_info;
++      int ret;
++
++      for (int i = 0; i < env->subprog_cnt; i++) {
++              if (!i || si[i].is_async_cb) {
++                      ret = check_max_stack_depth_subprog(env, i);
++                      if (ret < 0)
++                              return ret;
++              }
++              continue;
++      }
++      return 0;
++}
++
+ #ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ static int get_callee_stack_depth(struct bpf_verifier_env *env,
+                                 const struct bpf_insn *insn, int idx)
+-- 
+2.39.2
+
diff --git a/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch b/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch

new file mode 100644 (file)

index 0000000..2d88a8a
--- /dev/null
+++ b/queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch
@@ -0,0 +1,152 @@
+From 76b79c254cf2d798a26a7e99c73226b2df0ff1bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 22:51:49 +0000
+Subject: bpf: tcp: Avoid taking fast sock lock in iterator
+
+From: Aditi Ghag <aditi.ghag@isovalent.com>
+
+[ Upstream commit 9378096e8a656fb5c4099b26b1370c56f056eab9 ]
+
+This is a preparatory commit to replace `lock_sock_fast` with
+`lock_sock`,and facilitate BPF programs executed from the TCP sockets
+iterator to be able to destroy TCP sockets using the bpf_sock_destroy
+kfunc (implemented in follow-up commits).
+
+Previously, BPF TCP iterator was acquiring the sock lock with BH
+disabled. This led to scenarios where the sockets hash table bucket lock
+can be acquired with BH enabled in some path versus disabled in other.
+In such situation, kernel issued a warning since it thinks that in the
+BH enabled path the same bucket lock *might* be acquired again in the
+softirq context (BH disabled), which will lead to a potential dead lock.
+Since bpf_sock_destroy also happens in a process context, the potential
+deadlock warning is likely a false alarm.
+
+Here is a snippet of annotated stack trace that motivated this change:
+
+```
+
+Possible interrupt unsafe locking scenario:
+
+      CPU0                    CPU1
+      ----                    ----
+ lock(&h->lhash2[i].lock);
+                              local_bh_disable();
+                              lock(&h->lhash2[i].lock);
+kernel imagined possible scenario:
+  local_bh_disable();  /* Possible softirq */
+  lock(&h->lhash2[i].lock);
+*** Potential Deadlock ***
+
+process context:
+
+lock_acquire+0xcd/0x330
+_raw_spin_lock+0x33/0x40
+------> Acquire (bucket) lhash2.lock with BH enabled
+__inet_hash+0x4b/0x210
+inet_csk_listen_start+0xe6/0x100
+inet_listen+0x95/0x1d0
+__sys_listen+0x69/0xb0
+__x64_sys_listen+0x14/0x20
+do_syscall_64+0x3c/0x90
+entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+bpf_sock_destroy run from iterator:
+
+lock_acquire+0xcd/0x330
+_raw_spin_lock+0x33/0x40
+------> Acquire (bucket) lhash2.lock with BH disabled
+inet_unhash+0x9a/0x110
+tcp_set_state+0x6a/0x210
+tcp_abort+0x10d/0x200
+bpf_prog_6793c5ca50c43c0d_iter_tcp6_server+0xa4/0xa9
+bpf_iter_run_prog+0x1ff/0x340
+------> lock_sock_fast that acquires sock lock with BH disabled
+bpf_iter_tcp_seq_show+0xca/0x190
+bpf_seq_read+0x177/0x450
+
+```
+
+Also, Yonghong reported a deadlock for non-listening TCP sockets that
+this change resolves. Previously, `lock_sock_fast` held the sock spin
+lock with BH which was again being acquired in `tcp_abort`:
+
+```
+watchdog: BUG: soft lockup - CPU#0 stuck for 86s! [test_progs:2331]
+RIP: 0010:queued_spin_lock_slowpath+0xd8/0x500
+Call Trace:
+ <TASK>
+ _raw_spin_lock+0x84/0x90
+ tcp_abort+0x13c/0x1f0
+ bpf_prog_88539c5453a9dd47_iter_tcp6_client+0x82/0x89
+ bpf_iter_run_prog+0x1aa/0x2c0
+ ? preempt_count_sub+0x1c/0xd0
+ ? from_kuid_munged+0x1c8/0x210
+ bpf_iter_tcp_seq_show+0x14e/0x1b0
+ bpf_seq_read+0x36c/0x6a0
+
+bpf_iter_tcp_seq_show
+   lock_sock_fast
+     __lock_sock_fast
+       spin_lock_bh(&sk->sk_lock.slock);
+       /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held.* */
+
+ ...
+ tcp_abort
+   local_bh_disable();
+   spin_lock(&((sk)->sk_lock.slock)); // from bh_lock_sock(sk)
+
+```
+
+With the switch to `lock_sock`, it calls `spin_unlock_bh` before returning:
+
+```
+lock_sock
+    lock_sock_nested
+       spin_lock_bh(&sk->sk_lock.slock);
+       :
+       spin_unlock_bh(&sk->sk_lock.slock);
+```
+
+Acked-by: Yonghong Song <yhs@meta.com>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Signed-off-by: Aditi Ghag <aditi.ghag@isovalent.com>
+Link: https://lore.kernel.org/r/20230519225157.760788-2-aditi.ghag@isovalent.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index b37c1bcb15097..a7de5ba74e7f7 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2911,7 +2911,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+       struct bpf_iter_meta meta;
+       struct bpf_prog *prog;
+       struct sock *sk = v;
+-      bool slow;
+       uid_t uid;
+       int ret;
+ 
+@@ -2919,7 +2918,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+               return 0;
+ 
+       if (sk_fullsock(sk))
+-              slow = lock_sock_fast(sk);
++              lock_sock(sk);
+ 
+       if (unlikely(sk_unhashed(sk))) {
+               ret = SEQ_SKIP;
+@@ -2943,7 +2942,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+ 
+ unlock:
+       if (sk_fullsock(sk))
+-              unlock_sock_fast(sk, slow);
++              release_sock(sk);
+       return ret;
+ 
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch b/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch

new file mode 100644 (file)

index 0000000..b6461aa
--- /dev/null
+++ b/queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch
@@ -0,0 +1,71 @@
+From 5841124edbf8b166987956c008ec9eafe491d36b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 08:44:49 -0700
+Subject: bridge: Add extack warning when enabling STP in netns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 56a16035bb6effb37177867cea94c13a8382f745 ]
+
+When we create an L2 loop on a bridge in netns, we will see packets storm
+even if STP is enabled.
+
+  # unshare -n
+  # ip link add br0 type bridge
+  # ip link add veth0 type veth peer name veth1
+  # ip link set veth0 master br0 up
+  # ip link set veth1 master br0 up
+  # ip link set br0 type bridge stp_state 1
+  # ip link set br0 up
+  # sleep 30
+  # ip -s link show br0
+  2: br0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000
+      link/ether b6:61:98:1c:1c:b5 brd ff:ff:ff:ff:ff:ff
+      RX: bytes  packets  errors  dropped missed  mcast
+      956553768  12861249 0       0       0       12861249  <-. Keep
+      TX: bytes  packets  errors  dropped carrier collsns     |  increasing
+      1027834    11951    0       0       0       0         <-'   rapidly
+
+This is because llc_rcv() drops all packets in non-root netns and BPDU
+is dropped.
+
+Let's add extack warning when enabling STP in netns.
+
+  # unshare -n
+  # ip link add br0 type bridge
+  # ip link set br0 type bridge stp_state 1
+  Warning: bridge: STP does not work in non-root netns.
+
+Note this commit will be reverted later when we namespacify the whole LLC
+infra.
+
+Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe")
+Suggested-by: Harry Coin <hcoin@quietfountain.com>
+Link: https://lore.kernel.org/netdev/0f531295-e289-022d-5add-5ceffa0df9bc@quietfountain.com/
+Suggested-by: Ido Schimmel <idosch@idosch.org>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_stp_if.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
+index 75204d36d7f90..b65962682771f 100644
+--- a/net/bridge/br_stp_if.c
++++ b/net/bridge/br_stp_if.c
+@@ -201,6 +201,9 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+ {
+       ASSERT_RTNL();
+ 
++      if (!net_eq(dev_net(br->dev), &init_net))
++              NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns");
++
+       if (br_mrp_enabled(br)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "STP can't be enabled if MRP is already enabled");
+-- 
+2.39.2
+
diff --git a/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch b/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch

new file mode 100644 (file)

index 0000000..893e406
--- /dev/null
+++ b/queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch
@@ -0,0 +1,50 @@
+From 34038040cc781e64ecfa341e776b1d3ca1839d8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 08:13:23 +0200
+Subject: btrfs: be a bit more careful when setting mirror_num_ret in
+ btrfs_map_block
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 4e7de35eb7d1a1d4f2dda15f39fbedd4798a0b8d ]
+
+The mirror_num_ret is allowed to be NULL, although it has to be set when
+smap is set.  Unfortunately that is not a well enough specifiable
+invariant for static type checkers, so add a NULL check to make sure they
+are fine.
+
+Fixes: 03793cbbc80f ("btrfs: add fast path for single device io in __btrfs_map_block")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 7433ae929fdcb..2e0832d70406c 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6595,11 +6595,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+               if (patch_the_first_stripe_for_dev_replace) {
+                       smap->dev = dev_replace->tgtdev;
+                       smap->physical = physical_to_patch_in_first_stripe;
+-                      *mirror_num_ret = map->num_stripes + 1;
++                      if (mirror_num_ret)
++                              *mirror_num_ret = map->num_stripes + 1;
+               } else {
+                       set_io_stripe(smap, map, stripe_index, stripe_offset,
+                                     stripe_nr);
+-                      *mirror_num_ret = mirror_num;
++                      if (mirror_num_ret)
++                              *mirror_num_ret = mirror_num;
+               }
+               *bioc_ret = NULL;
+               ret = 0;
+-- 
+2.39.2
+
diff --git a/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch b/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch

new file mode 100644 (file)

index 0000000..7a2c897
--- /dev/null
+++ b/queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch
@@ -0,0 +1,100 @@
+From 7a8eaa17077746c57f6fa160701348e82e480ae9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jul 2023 08:56:33 +0000
+Subject: cifs: fix mid leak during reconnection after timeout threshold
+
+From: Shyam Prasad N <nspmangalore@gmail.com>
+
+[ Upstream commit 69cba9d3c1284e0838ae408830a02c4a063104bc ]
+
+When the number of responses with status of STATUS_IO_TIMEOUT
+exceeds a specified threshold (NUM_STATUS_IO_TIMEOUT), we reconnect
+the connection. But we do not return the mid, or the credits
+returned for the mid, or reduce the number of in-flight requests.
+
+This bug could result in the server->in_flight count to go bad,
+and also cause a leak in the mids.
+
+This change moves the check to a few lines below where the
+response is decrypted, even of the response is read from the
+transform header. This way, the code for returning the mids
+can be reused.
+
+Also, the cifs_reconnect was reconnecting just the transport
+connection before. In case of multi-channel, this may not be
+what we want to do after several timeouts. Changed that to
+reconnect the session and the tree too.
+
+Also renamed NUM_STATUS_IO_TIMEOUT to a more appropriate name
+MAX_STATUS_IO_TIMEOUT.
+
+Fixes: 8e670f77c4a5 ("Handle STATUS_IO_TIMEOUT gracefully")
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/connect.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
+index 935fe198a4baf..cbe08948baf4a 100644
+--- a/fs/smb/client/connect.c
++++ b/fs/smb/client/connect.c
+@@ -59,7 +59,7 @@ extern bool disable_legacy_dialects;
+ #define TLINK_IDLE_EXPIRE     (600 * HZ)
+ 
+ /* Drop the connection to not overload the server */
+-#define NUM_STATUS_IO_TIMEOUT   5
++#define MAX_STATUS_IO_TIMEOUT   5
+ 
+ struct mount_ctx {
+       struct cifs_sb_info *cifs_sb;
+@@ -1162,6 +1162,7 @@ cifs_demultiplex_thread(void *p)
+       struct mid_q_entry *mids[MAX_COMPOUND];
+       char *bufs[MAX_COMPOUND];
+       unsigned int noreclaim_flag, num_io_timeout = 0;
++      bool pending_reconnect = false;
+ 
+       noreclaim_flag = memalloc_noreclaim_save();
+       cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
+@@ -1201,6 +1202,8 @@ cifs_demultiplex_thread(void *p)
+               cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
+               if (!is_smb_response(server, buf[0]))
+                       continue;
++
++              pending_reconnect = false;
+ next_pdu:
+               server->pdu_size = pdu_length;
+ 
+@@ -1258,10 +1261,13 @@ cifs_demultiplex_thread(void *p)
+               if (server->ops->is_status_io_timeout &&
+                   server->ops->is_status_io_timeout(buf)) {
+                       num_io_timeout++;
+-                      if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
+-                              cifs_reconnect(server, false);
++                      if (num_io_timeout > MAX_STATUS_IO_TIMEOUT) {
++                              cifs_server_dbg(VFS,
++                                              "Number of request timeouts exceeded %d. Reconnecting",
++                                              MAX_STATUS_IO_TIMEOUT);
++
++                              pending_reconnect = true;
+                               num_io_timeout = 0;
+-                              continue;
+                       }
+               }
+ 
+@@ -1308,6 +1314,11 @@ cifs_demultiplex_thread(void *p)
+                       buf = server->smallbuf;
+                       goto next_pdu;
+               }
++
++              /* do this reconnect at the very end after processing all MIDs */
++              if (pending_reconnect)
++                      cifs_reconnect(server, true);
++
+       } /* end while !EXITING */
+ 
+       /* buffer usually freed in free_mid - need to free it here on exit */
+-- 
+2.39.2
+
diff --git a/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch b/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch

new file mode 100644 (file)

index 0000000..d655202
--- /dev/null
+++ b/queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch
@@ -0,0 +1,77 @@
+From 4aca3a9686777cc7cbeeafbea29e9349e546bc92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 11:54:47 +0200
+Subject: devlink: report devlink_port_type_warn source device
+
+From: Petr Oros <poros@redhat.com>
+
+[ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ]
+
+devlink_port_type_warn is scheduled for port devlink and warning
+when the port type is not set. But from this warning it is not easy
+found out which device (driver) has no devlink port set.
+
+[ 3709.975552] Type was not set for devlink port.
+[ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20
+[ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm
+[ 3709.994030]  crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse
+[ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1
+[ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022
+[ 3710.108437] Workqueue: events devlink_port_type_warn
+[ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20
+[ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87
+[ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282
+[ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027
+[ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8
+[ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18
+[ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600
+[ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905
+[ 3710.108452] FS:  0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000
+[ 3710.108453] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0
+[ 3710.108456] PKRU: 55555554
+[ 3710.108457] Call Trace:
+[ 3710.108458]  <TASK>
+[ 3710.108459]  process_one_work+0x1e2/0x3b0
+[ 3710.108466]  ? rescuer_thread+0x390/0x390
+[ 3710.108468]  worker_thread+0x50/0x3a0
+[ 3710.108471]  ? rescuer_thread+0x390/0x390
+[ 3710.108473]  kthread+0xdd/0x100
+[ 3710.108477]  ? kthread_complete_and_exit+0x20/0x20
+[ 3710.108479]  ret_from_fork+0x1f/0x30
+[ 3710.108485]  </TASK>
+[ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]---
+
+After patch:
+[  402.473064] ice 0000:41:00.0: Type was not set for devlink port.
+[  402.473064] ice 0000:41:00.1: Type was not set for devlink port.
+
+Signed-off-by: Petr Oros <poros@redhat.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/devlink.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/devlink.c b/net/core/devlink.c
+index 2aa77d4b80d0a..5a4a4b34ac15c 100644
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -9826,7 +9826,10 @@ EXPORT_SYMBOL_GPL(devlink_free);
+ 
+ static void devlink_port_type_warn(struct work_struct *work)
+ {
+-      WARN(true, "Type was not set for devlink port.");
++      struct devlink_port *port = container_of(to_delayed_work(work),
++                                               struct devlink_port,
++                                               type_warn_dw);
++      dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+ }
+ 
+ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+-- 
+2.39.2
+
diff --git a/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch b/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch

new file mode 100644 (file)

index 0000000..4c2b917
--- /dev/null
+++ b/queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch
@@ -0,0 +1,43 @@
+From 20d5e3268aeb5cd2827f61521d33a0203f680509 Mon Sep 17 00:00:00 2001
+From: hackyzh002 <hackyzh002@gmail.com>
+Date: Wed, 19 Apr 2023 20:20:58 +0800
+Subject: [PATCH AUTOSEL 4.19 01/11] drm/radeon: Fix integer overflow in
+ radeon_cs_parser_init
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit f828b681d0cd566f86351c0b913e6cb6ed8c7b9c ]
+
+The type of size is unsigned, if size is 0x40000000, there will be an
+integer overflow, size will be zero after size *= sizeof(uint32_t),
+will cause uninitialized memory to be referenced later
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: hackyzh002 <hackyzh002@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/radeon/radeon_cs.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
+index 1ae31dbc61c64..5e61abb3dce5c 100644
+--- a/drivers/gpu/drm/radeon/radeon_cs.c
++++ b/drivers/gpu/drm/radeon/radeon_cs.c
+@@ -265,7 +265,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
+ {
+       struct drm_radeon_cs *cs = data;
+       uint64_t *chunk_array_ptr;
+-      unsigned size, i;
++      u64 size;
++      unsigned i;
+       u32 ring = RADEON_CS_RING_GFX;
+       s32 priority = 0;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch b/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch

new file mode 100644 (file)

index 0000000..70d64a5
--- /dev/null
+++ b/queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch
@@ -0,0 +1,69 @@
+From 9bbaa84ecaeca40ae4d2d1cd4ab363546113da7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 00:34:05 +0200
+Subject: dsa: mv88e6xxx: Do a final check before timing out
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+[ Upstream commit 95ce158b6c93b28842b54b42ad1cb221b9844062 ]
+
+I get sporadic timeouts from the driver when using the
+MV88E6352. Reading the status again after the loop fixes the
+problem: the operation is successful but goes undetected.
+
+Some added prints show things like this:
+
+[   58.356209] mv88e6085 mdio_mux-0.1:00: Timeout while waiting
+    for switch, addr 1b reg 0b, mask 8000, val 0000, data c000
+[   58.367487] mv88e6085 mdio_mux-0.1:00: Timeout waiting for
+    ATU op 4000, fid 0001
+(...)
+[   61.826293] mv88e6085 mdio_mux-0.1:00: Timeout while waiting
+    for switch, addr 1c reg 18, mask 8000, val 0000, data 9860
+[   61.837560] mv88e6085 mdio_mux-0.1:00: Timeout waiting
+    for PHY command 1860 to complete
+
+The reason is probably not the commands: I think those are
+mostly fine with the 50+50ms timeout, but the problem
+appears when OpenWrt brings up several interfaces in
+parallel on a system with 7 populated ports: if one of
+them take more than 50 ms and waits one or more of the
+others can get stuck on the mutex for the switch and then
+this can easily multiply.
+
+As we sleep and wait, the function loop needs a final
+check after exiting the loop if we were successful.
+
+Suggested-by: Andrew Lunn <andrew@lunn.ch>
+Cc: Tobias Waldekranz <tobias@waldekranz.com>
+Fixes: 35da1dfd9484 ("net: dsa: mv88e6xxx: Improve performance of busy bit polling")
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20230712223405.861899-1-linus.walleij@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 4db1652015d1d..b69bd44ada1f2 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -109,6 +109,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
+                       usleep_range(1000, 2000);
+       }
+ 
++      err = mv88e6xxx_read(chip, addr, reg, &data);
++      if (err)
++              return err;
++
++      if ((data & mask) == val)
++              return 0;
++
+       dev_err(chip->dev, "Timeout while waiting for switch\n");
+       return -ETIMEDOUT;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch b/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch

new file mode 100644 (file)

index 0000000..0e0a727
--- /dev/null
+++ b/queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch
@@ -0,0 +1,40 @@
+From 3f351b5e8558e6d06eb00f3a0b3ce2ac4d1bd613 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 15 Jul 2023 16:16:56 +0800
+Subject: fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe
+
+From: Zhang Shurong <zhang_shurong@foxmail.com>
+
+[ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ]
+
+This func misses checking for platform_get_irq()'s call and may passes the
+negative error codes to request_irq(), which takes unsigned IRQ #,
+causing it to fail with -EINVAL, overriding an original error code.
+
+Fix this by stop calling request_irq() with invalid IRQ #s.
+
+Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ")
+Signed-off-by: Zhang Shurong <zhang_shurong@foxmail.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/au1200fb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
+index b6b22fa4a8a01..fd3ff398d234a 100644
+--- a/drivers/video/fbdev/au1200fb.c
++++ b/drivers/video/fbdev/au1200fb.c
+@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
+ 
+       /* Now hook interrupt too */
+       irq = platform_get_irq(dev, 0);
++      if (irq < 0)
++              return irq;
++
+       ret = request_irq(irq, au1200fb_handle_irq,
+                         IRQF_SHARED, "lcd", (void *)dev);
+       if (ret) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch b/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch

new file mode 100644 (file)

index 0000000..4ced25e
--- /dev/null
+++ b/queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch
@@ -0,0 +1,36 @@
+From 37392063869cec1e0f260e3d3edc86270b958c95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 21:19:58 +0800
+Subject: fbdev: imxfb: Removed unneeded release_mem_region
+
+From: Yangtao Li <frank.li@vivo.com>
+
+[ Upstream commit 45fcc058a75bf5d65cf4c32da44a252fbe873cd4 ]
+
+Remove unnecessary release_mem_region from the error path to prevent
+mem region from being released twice, which could avoid resource leak
+or other unexpected issues.
+
+Fixes: b083c22d5114 ("video: fbdev: imxfb: Convert request_mem_region + ioremap to devm_ioremap_resource")
+Signed-off-by: Yangtao Li <frank.li@vivo.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imxfb.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index 61731921011d5..36ada87b49a49 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -1043,7 +1043,6 @@ static int imxfb_probe(struct platform_device *pdev)
+ failed_map:
+ failed_ioremap:
+ failed_getclock:
+-      release_mem_region(res->start, resource_size(res));
+ failed_of_parse:
+       kfree(info->pseudo_palette);
+ failed_init:
+-- 
+2.39.2
+
diff --git a/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch b/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch

new file mode 100644 (file)

index 0000000..5efab42
--- /dev/null
+++ b/queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch
@@ -0,0 +1,43 @@
+From c6e2909b7334117823ea14b1738ea3584813e756 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 15:24:37 +0200
+Subject: fbdev: imxfb: warn about invalid left/right margin
+
+From: Martin Kaiser <martin@kaiser.cx>
+
+[ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ]
+
+Warn about invalid var->left_margin or var->right_margin. Their values
+are read from the device tree.
+
+We store var->left_margin-3 and var->right_margin-1 in register
+fields. These fields should be >= 0.
+
+Fixes: 7e8549bcee00 ("imxfb: Fix margin settings")
+Signed-off-by: Martin Kaiser <martin@kaiser.cx>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imxfb.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index 51fde1b2a7938..61731921011d5 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -613,10 +613,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
+       if (var->hsync_len < 1    || var->hsync_len > 64)
+               printk(KERN_ERR "%s: invalid hsync_len %d\n",
+                       info->fix.id, var->hsync_len);
+-      if (var->left_margin > 255)
++      if (var->left_margin < 3  || var->left_margin > 255)
+               printk(KERN_ERR "%s: invalid left_margin %d\n",
+                       info->fix.id, var->left_margin);
+-      if (var->right_margin > 255)
++      if (var->right_margin < 1 || var->right_margin > 255)
+               printk(KERN_ERR "%s: invalid right_margin %d\n",
+                       info->fix.id, var->right_margin);
+       if (var->yres < 1 || var->yres > ymax_mask)
+-- 
+2.39.2
+
diff --git a/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch b/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch

new file mode 100644 (file)

index 0000000..4331148
--- /dev/null
+++ b/queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch
@@ -0,0 +1,41 @@
+From ffb509c36e5b36da98c9fb1f8f539f0cbf606665 Mon Sep 17 00:00:00 2001
+From: Immad Mir <mirimmad17@gmail.com>
+Date: Fri, 23 Jun 2023 19:17:08 +0530
+Subject: [PATCH AUTOSEL 4.19 11/11] FS: JFS: Check for read-only mounted
+ filesystem in txBegin
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 95e2b352c03b0a86c5717ba1d24ea20969abcacc ]
+
+ This patch adds a check for read-only mounted filesystem
+ in txBegin before starting a transaction potentially saving
+ from NULL pointer deref.
+
+Signed-off-by: Immad Mir <mirimmad17@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_txnmgr.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
+index 78789c5ed36b0..e10db01f253b8 100644
+--- a/fs/jfs/jfs_txnmgr.c
++++ b/fs/jfs/jfs_txnmgr.c
+@@ -367,6 +367,11 @@ tid_t txBegin(struct super_block *sb, int flag)
+       jfs_info("txBegin: flag = 0x%x", flag);
+       log = JFS_SBI(sb)->log;
+ 
++      if (!log) {
++              jfs_error(sb, "read-only filesystem\n");
++              return 0;
++      }
++
+       TXN_LOCK();
+ 
+       INCREMENT(TxStat.txBegin);
+-- 
+2.39.2
+
diff --git a/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch b/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch

new file mode 100644 (file)

index 0000000..bccceac
--- /dev/null
+++ b/queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch
@@ -0,0 +1,46 @@
+From ced92b3b30ff868a14d5763842e5299bdad70edb Mon Sep 17 00:00:00 2001
+From: Immad Mir <mirimmad17@gmail.com>
+Date: Fri, 23 Jun 2023 19:14:01 +0530
+Subject: [PATCH AUTOSEL 4.19 10/11] FS: JFS: Fix null-ptr-deref Read in
+ txBegin
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 47cfdc338d674d38f4b2f22b7612cc6a2763ba27 ]
+
+ Syzkaller reported an issue where txBegin may be called
+ on a superblock in a read-only mounted filesystem which leads
+ to NULL pointer deref. This could be solved by checking if
+ the filesystem is read-only before calling txBegin, and returning
+ with appropiate error code.
+
+Reported-By: syzbot+f1faa20eec55e0c8644c@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=be7e52c50c5182cc09a09ea6fc456446b2039de3
+
+Signed-off-by: Immad Mir <mirimmad17@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/namei.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
+index 14528c0ffe635..c2c439acbb780 100644
+--- a/fs/jfs/namei.c
++++ b/fs/jfs/namei.c
+@@ -811,6 +811,11 @@ static int jfs_link(struct dentry *old_dentry,
+       if (rc)
+               goto out;
+ 
++      if (isReadOnly(ip)) {
++              jfs_error(ip->i_sb, "read-only filesystem\n");
++              return -EROFS;
++      }
++
+       tid = txBegin(ip->i_sb, 0);
+ 
+       mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+-- 
+2.39.2
+
diff --git a/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch b/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch

new file mode 100644 (file)

index 0000000..0676bb0
--- /dev/null
+++ b/queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch
@@ -0,0 +1,88 @@
+From 35a29fcb694a5f3ee27d66f57f19795b367fd883 Mon Sep 17 00:00:00 2001
+From: Yogesh <yogi.kernel@gmail.com>
+Date: Thu, 22 Jun 2023 00:07:03 +0530
+Subject: [PATCH AUTOSEL 4.19 08/11] fs: jfs: Fix UBSAN:
+ array-index-out-of-bounds in dbAllocDmapLev
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 4e302336d5ca1767a06beee7596a72d3bdc8d983 ]
+
+Syzkaller reported the following issue:
+
+UBSAN: array-index-out-of-bounds in fs/jfs/jfs_dmap.c:1965:6
+index -84 is out of range for type 's8[341]' (aka 'signed char[341]')
+CPU: 1 PID: 4995 Comm: syz-executor146 Not tainted 6.4.0-rc6-syzkaller-00037-gb6dad5178cea #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
+ ubsan_epilogue lib/ubsan.c:217 [inline]
+ __ubsan_handle_out_of_bounds+0x11c/0x150 lib/ubsan.c:348
+ dbAllocDmapLev+0x3e5/0x430 fs/jfs/jfs_dmap.c:1965
+ dbAllocCtl+0x113/0x920 fs/jfs/jfs_dmap.c:1809
+ dbAllocAG+0x28f/0x10b0 fs/jfs/jfs_dmap.c:1350
+ dbAlloc+0x658/0xca0 fs/jfs/jfs_dmap.c:874
+ dtSplitUp fs/jfs/jfs_dtree.c:974 [inline]
+ dtInsert+0xda7/0x6b00 fs/jfs/jfs_dtree.c:863
+ jfs_create+0x7b6/0xbb0 fs/jfs/namei.c:137
+ lookup_open fs/namei.c:3492 [inline]
+ open_last_lookups fs/namei.c:3560 [inline]
+ path_openat+0x13df/0x3170 fs/namei.c:3788
+ do_filp_open+0x234/0x490 fs/namei.c:3818
+ do_sys_openat2+0x13f/0x500 fs/open.c:1356
+ do_sys_open fs/open.c:1372 [inline]
+ __do_sys_openat fs/open.c:1388 [inline]
+ __se_sys_openat fs/open.c:1383 [inline]
+ __x64_sys_openat+0x247/0x290 fs/open.c:1383
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f1f4e33f7e9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007ffc21129578 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1f4e33f7e9
+RDX: 000000000000275a RSI: 0000000020000040 RDI: 00000000ffffff9c
+RBP: 00007f1f4e2ff080 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1f4e2ff110
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ </TASK>
+
+The bug occurs when the dbAllocDmapLev()function attempts to access
+dp->tree.stree[leafidx + LEAFIND] while the leafidx value is negative.
+
+To rectify this, the patch introduces a safeguard within the
+dbAllocDmapLev() function. A check has been added to verify if leafidx is
+negative. If it is, the function immediately returns an I/O error, preventing
+any further execution that could potentially cause harm.
+
+Tested via syzbot.
+
+Reported-by: syzbot+853a6f4dfa3cf37d3aea@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?extid=ae2f5a27a07ae44b0f17
+Signed-off-by: Yogesh <yogi.kernel@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_dmap.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index 3ad0a33e0443b..6fdf18caf241d 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -2034,6 +2034,9 @@ dbAllocDmapLev(struct bmap * bmp,
+       if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+               return -ENOSPC;
+ 
++      if (leafidx < 0)
++              return -EIO;
++
+       /* determine the block number within the file system corresponding
+        * to the leaf at which free space was found.
+        */
+-- 
+2.39.2
+
diff --git a/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch b/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch

new file mode 100644 (file)

index 0000000..f0aa875
--- /dev/null
+++ b/queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch
@@ -0,0 +1,56 @@
+From dc3ca84683c4bb50761998adaf575f383748ba73 Mon Sep 17 00:00:00 2001
+From: Marco Morandini <marco.morandini@polimi.it>
+Date: Tue, 30 May 2023 15:40:08 +0200
+Subject: [PATCH AUTOSEL 4.19 05/11] HID: add quirk for 03f0:464a HP Elite
+ Presenter Mouse
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 0db117359e47750d8bd310d19f13e1c4ef7fc26a ]
+
+HP Elite Presenter Mouse HID Record Descriptor shows
+two mouses (Repord ID 0x1 and 0x2), one keypad (Report ID 0x5),
+two Consumer Controls (Report IDs 0x6 and 0x3).
+Previous to this commit it registers one mouse, one keypad
+and one Consumer Control, and it was usable only as a
+digitl laser pointer (one of the two mouses). This patch defines
+the 464a USB device ID and enables the HID_QUIRK_MULTI_INPUT
+quirk for it, allowing to use the device both as a mouse
+and a digital laser pointer.
+
+Signed-off-by: Marco Morandini <marco.morandini@polimi.it>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-ids.h    | 1 +
+ drivers/hid/hid-quirks.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index c0ba8d6f4978f..a9d6f8acf70b5 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -571,6 +571,7 @@
+ #define USB_DEVICE_ID_UGCI_FIGHTING   0x0030
+ 
+ #define USB_VENDOR_ID_HP              0x03f0
++#define USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A          0x464a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a
+ #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE                0x134a
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index 8de294aa3184a..a2ab338166e61 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -98,6 +98,7 @@ static const struct hid_device_id hid_quirks[] = {
+       { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096), HID_QUIRK_NO_INIT_REPORTS },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A293), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A), HID_QUIRK_ALWAYS_POLL },
++      { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A), HID_QUIRK_MULTI_INPUT },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+       { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL },
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch b/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch

new file mode 100644 (file)

index 0000000..85904ba
--- /dev/null
+++ b/queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch
@@ -0,0 +1,342 @@
+From d67f7140ec52c786fa3e1e17d5a41330d5965e52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:25 -0400
+Subject: iavf: fix a deadlock caused by rtnl and driver's lock circular
+ dependencies
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit d1639a17319ba78a018280cd2df6577a7e5d9fab ]
+
+A driver's lock (crit_lock) is used to serialize all the driver's tasks.
+Lockdep, however, shows a circular dependency between rtnl and
+crit_lock. This happens when an ndo that already holds the rtnl requests
+the driver to reset, since the reset task (in some paths) tries to grab
+rtnl to either change real number of queues of update netdev features.
+
+  [566.241851] ======================================================
+  [566.241893] WARNING: possible circular locking dependency detected
+  [566.241936] 6.2.14-100.fc36.x86_64+debug #1 Tainted: G           OE
+  [566.241984] ------------------------------------------------------
+  [566.242025] repro.sh/2604 is trying to acquire lock:
+  [566.242061] ffff9280fc5ceee8 (&adapter->crit_lock){+.+.}-{3:3}, at: iavf_close+0x3c/0x240 [iavf]
+  [566.242167]
+               but task is already holding lock:
+  [566.242209] ffffffff9976d350 (rtnl_mutex){+.+.}-{3:3}, at: iavf_remove+0x6b5/0x730 [iavf]
+  [566.242300]
+               which lock already depends on the new lock.
+
+  [566.242353]
+               the existing dependency chain (in reverse order) is:
+  [566.242401]
+               -> #1 (rtnl_mutex){+.+.}-{3:3}:
+  [566.242451]        __mutex_lock+0xc1/0xbb0
+  [566.242489]        iavf_init_interrupt_scheme+0x179/0x440 [iavf]
+  [566.242560]        iavf_watchdog_task+0x80b/0x1400 [iavf]
+  [566.242627]        process_one_work+0x2b3/0x560
+  [566.242663]        worker_thread+0x4f/0x3a0
+  [566.242696]        kthread+0xf2/0x120
+  [566.242730]        ret_from_fork+0x29/0x50
+  [566.242763]
+               -> #0 (&adapter->crit_lock){+.+.}-{3:3}:
+  [566.242815]        __lock_acquire+0x15ff/0x22b0
+  [566.242869]        lock_acquire+0xd2/0x2c0
+  [566.242901]        __mutex_lock+0xc1/0xbb0
+  [566.242934]        iavf_close+0x3c/0x240 [iavf]
+  [566.242997]        __dev_close_many+0xac/0x120
+  [566.243036]        dev_close_many+0x8b/0x140
+  [566.243071]        unregister_netdevice_many_notify+0x165/0x7c0
+  [566.243116]        unregister_netdevice_queue+0xd3/0x110
+  [566.243157]        iavf_remove+0x6c1/0x730 [iavf]
+  [566.243217]        pci_device_remove+0x33/0xa0
+  [566.243257]        device_release_driver_internal+0x1bc/0x240
+  [566.243299]        pci_stop_bus_device+0x6c/0x90
+  [566.243338]        pci_stop_and_remove_bus_device+0xe/0x20
+  [566.243380]        pci_iov_remove_virtfn+0xd1/0x130
+  [566.243417]        sriov_disable+0x34/0xe0
+  [566.243448]        ice_free_vfs+0x2da/0x330 [ice]
+  [566.244383]        ice_sriov_configure+0x88/0xad0 [ice]
+  [566.245353]        sriov_numvfs_store+0xde/0x1d0
+  [566.246156]        kernfs_fop_write_iter+0x15e/0x210
+  [566.246921]        vfs_write+0x288/0x530
+  [566.247671]        ksys_write+0x74/0xf0
+  [566.248408]        do_syscall_64+0x58/0x80
+  [566.249145]        entry_SYSCALL_64_after_hwframe+0x72/0xdc
+  [566.249886]
+                 other info that might help us debug this:
+
+  [566.252014]  Possible unsafe locking scenario:
+
+  [566.253432]        CPU0                    CPU1
+  [566.254118]        ----                    ----
+  [566.254800]   lock(rtnl_mutex);
+  [566.255514]                                lock(&adapter->crit_lock);
+  [566.256233]                                lock(rtnl_mutex);
+  [566.256897]   lock(&adapter->crit_lock);
+  [566.257388]
+                  *** DEADLOCK ***
+
+The deadlock can be triggered by a script that is continuously resetting
+the VF adapter while doing other operations requiring RTNL, e.g:
+
+       while :; do
+               ip link set $VF up
+               ethtool --set-channels $VF combined 2
+               ip link set $VF down
+               ip link set $VF up
+               ethtool --set-channels $VF combined 4
+               ip link set $VF down
+       done
+
+Any operation that triggers a reset can substitute "ethtool --set-channles"
+
+As a fix, add a new task "finish_config" that do all the work which
+needs rtnl lock. With the exception of iavf_remove(), all work that
+require rtnl should be called from this task.
+
+As for iavf_remove(), at the point where we need to call
+unregister_netdevice() (and grab rtnl_lock), we make sure the finish_config
+task is not running (cancel_work_sync()) to safely grab rtnl. Subsequent
+finish_config work cannot restart after that since the task is guarded
+by the __IAVF_IN_REMOVE_TASK bit in iavf_schedule_finish_config().
+
+Fixes: 5ac49f3c2702 ("iavf: use mutexes for locking of critical sections")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h        |   2 +
+ drivers/net/ethernet/intel/iavf/iavf_main.c   | 114 +++++++++++++-----
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   |   1 +
+ 3 files changed, 85 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 2fe44e865d0a2..305675042fe55 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -255,6 +255,7 @@ struct iavf_adapter {
+       struct workqueue_struct *wq;
+       struct work_struct reset_task;
+       struct work_struct adminq_task;
++      struct work_struct finish_config;
+       struct delayed_work client_task;
+       wait_queue_head_t down_waitqueue;
+       wait_queue_head_t reset_waitqueue;
+@@ -521,6 +522,7 @@ int iavf_process_config(struct iavf_adapter *adapter);
+ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
+ void iavf_schedule_reset(struct iavf_adapter *adapter);
+ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
++void iavf_schedule_finish_config(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+ void iavf_set_ethtool_ops(struct net_device *netdev);
+ void iavf_update_stats(struct iavf_adapter *adapter);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index c2739071149de..0e201d690f0dd 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1702,10 +1702,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
+               adapter->msix_entries[vector].entry = vector;
+ 
+       err = iavf_acquire_msix_vectors(adapter, v_budget);
++      if (!err)
++              iavf_schedule_finish_config(adapter);
+ 
+ out:
+-      netif_set_real_num_rx_queues(adapter->netdev, pairs);
+-      netif_set_real_num_tx_queues(adapter->netdev, pairs);
+       return err;
+ }
+ 
+@@ -1925,9 +1925,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+               goto err_alloc_queues;
+       }
+ 
+-      rtnl_lock();
+       err = iavf_set_interrupt_capability(adapter);
+-      rtnl_unlock();
+       if (err) {
+               dev_err(&adapter->pdev->dev,
+                       "Unable to setup interrupt capabilities\n");
+@@ -2013,6 +2011,78 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni
+       return err;
+ }
+ 
++/**
++ * iavf_finish_config - do all netdev work that needs RTNL
++ * @work: our work_struct
++ *
++ * Do work that needs both RTNL and crit_lock.
++ **/
++static void iavf_finish_config(struct work_struct *work)
++{
++      struct iavf_adapter *adapter;
++      int pairs, err;
++
++      adapter = container_of(work, struct iavf_adapter, finish_config);
++
++      /* Always take RTNL first to prevent circular lock dependency */
++      rtnl_lock();
++      mutex_lock(&adapter->crit_lock);
++
++      if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
++          adapter->netdev_registered &&
++          !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
++              netdev_update_features(adapter->netdev);
++              adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
++      }
++
++      switch (adapter->state) {
++      case __IAVF_DOWN:
++              if (!adapter->netdev_registered) {
++                      err = register_netdevice(adapter->netdev);
++                      if (err) {
++                              dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
++                                      err);
++
++                              /* go back and try again.*/
++                              iavf_free_rss(adapter);
++                              iavf_free_misc_irq(adapter);
++                              iavf_reset_interrupt_capability(adapter);
++                              iavf_change_state(adapter,
++                                                __IAVF_INIT_CONFIG_ADAPTER);
++                              goto out;
++                      }
++                      adapter->netdev_registered = true;
++              }
++
++              /* Set the real number of queues when reset occurs while
++               * state == __IAVF_DOWN
++               */
++              fallthrough;
++      case __IAVF_RUNNING:
++              pairs = adapter->num_active_queues;
++              netif_set_real_num_rx_queues(adapter->netdev, pairs);
++              netif_set_real_num_tx_queues(adapter->netdev, pairs);
++              break;
++
++      default:
++              break;
++      }
++
++out:
++      mutex_unlock(&adapter->crit_lock);
++      rtnl_unlock();
++}
++
++/**
++ * iavf_schedule_finish_config - Set the flags and schedule a reset event
++ * @adapter: board private structure
++ **/
++void iavf_schedule_finish_config(struct iavf_adapter *adapter)
++{
++      if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
++              queue_work(adapter->wq, &adapter->finish_config);
++}
++
+ /**
+  * iavf_process_aq_command - process aq_required flags
+  * and sends aq command
+@@ -2650,22 +2720,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+ 
+       netif_carrier_off(netdev);
+       adapter->link_up = false;
+-
+-      /* set the semaphore to prevent any callbacks after device registration
+-       * up to time when state of driver will be set to __IAVF_DOWN
+-       */
+-      rtnl_lock();
+-      if (!adapter->netdev_registered) {
+-              err = register_netdevice(netdev);
+-              if (err) {
+-                      rtnl_unlock();
+-                      goto err_register;
+-              }
+-      }
+-
+-      adapter->netdev_registered = true;
+-
+       netif_tx_stop_all_queues(netdev);
++
+       if (CLIENT_ALLOWED(adapter)) {
+               err = iavf_lan_add_device(adapter);
+               if (err)
+@@ -2678,7 +2734,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+ 
+       iavf_change_state(adapter, __IAVF_DOWN);
+       set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+-      rtnl_unlock();
+ 
+       iavf_misc_irq_enable(adapter);
+       wake_up(&adapter->down_waitqueue);
+@@ -2698,10 +2753,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+               /* request initial VLAN offload settings */
+               iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+ 
++      iavf_schedule_finish_config(adapter);
+       return;
++
+ err_mem:
+       iavf_free_rss(adapter);
+-err_register:
+       iavf_free_misc_irq(adapter);
+ err_sw_init:
+       iavf_reset_interrupt_capability(adapter);
+@@ -2728,15 +2784,6 @@ static void iavf_watchdog_task(struct work_struct *work)
+               goto restart_watchdog;
+       }
+ 
+-      if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+-          adapter->netdev_registered &&
+-          !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+-          rtnl_trylock()) {
+-              netdev_update_features(adapter->netdev);
+-              rtnl_unlock();
+-              adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+-      }
+-
+       if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+               iavf_change_state(adapter, __IAVF_COMM_FAILED);
+ 
+@@ -4980,6 +5027,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 
+       INIT_WORK(&adapter->reset_task, iavf_reset_task);
+       INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
++      INIT_WORK(&adapter->finish_config, iavf_finish_config);
+       INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+       INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
+       queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+@@ -5123,13 +5171,15 @@ static void iavf_remove(struct pci_dev *pdev)
+               usleep_range(500, 1000);
+       }
+       cancel_delayed_work_sync(&adapter->watchdog_task);
++      cancel_work_sync(&adapter->finish_config);
+ 
++      rtnl_lock();
+       if (adapter->netdev_registered) {
+-              rtnl_lock();
+               unregister_netdevice(netdev);
+               adapter->netdev_registered = false;
+-              rtnl_unlock();
+       }
++      rtnl_unlock();
++
+       if (CLIENT_ALLOWED(adapter)) {
+               err = iavf_lan_del_device(adapter);
+               if (err)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index eec7ac3b7f6ee..35419673b6987 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2237,6 +2237,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ 
+               iavf_process_config(adapter);
+               adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
++              iavf_schedule_finish_config(adapter);
+ 
+               iavf_set_queue_vlan_tag_loc(adapter);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch b/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch

new file mode 100644 (file)

index 0000000..ce0bd2c
--- /dev/null
+++ b/queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch
@@ -0,0 +1,160 @@
+From cc55115bcb0aa7ee5bb38c780a6de7795ff2f2b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 19:11:48 +0800
+Subject: iavf: Fix out-of-bounds when setting channels on remove
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit 7c4bced3caa749ce468b0c5de711c98476b23a52 ]
+
+If we set channels greater during iavf_remove(), and waiting reset done
+would be timeout, then returned with error but changed num_active_queues
+directly, that will lead to OOB like the following logs. Because the
+num_active_queues is greater than tx/rx_rings[] allocated actually.
+
+Reproducer:
+
+  [root@host ~]# cat repro.sh
+  #!/bin/bash
+
+  pf_dbsf="0000:41:00.0"
+  vf0_dbsf="0000:41:02.0"
+  g_pids=()
+
+  function do_set_numvf()
+  {
+      echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+      echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+  }
+
+  function do_set_channel()
+  {
+      local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
+      [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
+      ifconfig $nic 192.168.18.5 netmask 255.255.255.0
+      ifconfig $nic up
+      ethtool -L $nic combined 1
+      ethtool -L $nic combined 4
+      sleep $((RANDOM%3))
+  }
+
+  function on_exit()
+  {
+      local pid
+      for pid in "${g_pids[@]}"; do
+          kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
+      done
+      g_pids=()
+  }
+
+  trap "on_exit; exit" EXIT
+
+  while :; do do_set_numvf ; done &
+  g_pids+=($!)
+  while :; do do_set_channel ; done &
+  g_pids+=($!)
+
+  wait
+
+Result:
+
+[ 3506.152887] iavf 0000:41:02.0: Removing device
+[ 3510.400799] ==================================================================
+[ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536
+[ 3510.400823]
+[ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G           O     --------- -t - 4.18.0 #1
+[ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021
+[ 3510.400835] Call Trace:
+[ 3510.400851]  dump_stack+0x71/0xab
+[ 3510.400860]  print_address_description+0x6b/0x290
+[ 3510.400865]  ? iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400868]  kasan_report+0x14a/0x2b0
+[ 3510.400873]  iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400880]  iavf_remove+0x2b6/0xc70 [iavf]
+[ 3510.400884]  ? iavf_free_all_rx_resources+0x160/0x160 [iavf]
+[ 3510.400891]  ? wait_woken+0x1d0/0x1d0
+[ 3510.400895]  ? notifier_call_chain+0xc1/0x130
+[ 3510.400903]  pci_device_remove+0xa8/0x1f0
+[ 3510.400910]  device_release_driver_internal+0x1c6/0x460
+[ 3510.400916]  pci_stop_bus_device+0x101/0x150
+[ 3510.400919]  pci_stop_and_remove_bus_device+0xe/0x20
+[ 3510.400924]  pci_iov_remove_virtfn+0x187/0x420
+[ 3510.400927]  ? pci_iov_add_virtfn+0xe10/0xe10
+[ 3510.400929]  ? pci_get_subsys+0x90/0x90
+[ 3510.400932]  sriov_disable+0xed/0x3e0
+[ 3510.400936]  ? bus_find_device+0x12d/0x1a0
+[ 3510.400953]  i40e_free_vfs+0x754/0x1210 [i40e]
+[ 3510.400966]  ? i40e_reset_all_vfs+0x880/0x880 [i40e]
+[ 3510.400968]  ? pci_get_device+0x7c/0x90
+[ 3510.400970]  ? pci_get_subsys+0x90/0x90
+[ 3510.400982]  ? pci_vfs_assigned.part.7+0x144/0x210
+[ 3510.400987]  ? __mutex_lock_slowpath+0x10/0x10
+[ 3510.400996]  i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 3510.401001]  sriov_numvfs_store+0x214/0x290
+[ 3510.401005]  ? sriov_totalvfs_show+0x30/0x30
+[ 3510.401007]  ? __mutex_lock_slowpath+0x10/0x10
+[ 3510.401011]  ? __check_object_size+0x15a/0x350
+[ 3510.401018]  kernfs_fop_write+0x280/0x3f0
+[ 3510.401022]  vfs_write+0x145/0x440
+[ 3510.401025]  ksys_write+0xab/0x160
+[ 3510.401028]  ? __ia32_sys_read+0xb0/0xb0
+[ 3510.401031]  ? fput_many+0x1a/0x120
+[ 3510.401032]  ? filp_close+0xf0/0x130
+[ 3510.401038]  do_syscall_64+0xa0/0x370
+[ 3510.401041]  ? page_fault+0x8/0x30
+[ 3510.401043]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 3510.401073] RIP: 0033:0x7f3a9bb842c0
+[ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24
+[ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0
+[ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001
+[ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700
+[ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002
+[ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001
+[ 3510.401090]
+[ 3510.401093] Allocated by task 76795:
+[ 3510.401098]  kasan_kmalloc+0xa6/0xd0
+[ 3510.401099]  __kmalloc+0xfb/0x200
+[ 3510.401104]  iavf_init_interrupt_scheme+0x26f/0x1310 [iavf]
+[ 3510.401108]  iavf_watchdog_task+0x1d58/0x4050 [iavf]
+[ 3510.401114]  process_one_work+0x56a/0x11f0
+[ 3510.401115]  worker_thread+0x8f/0xf40
+[ 3510.401117]  kthread+0x2a0/0x390
+[ 3510.401119]  ret_from_fork+0x1f/0x40
+[ 3510.401122]  0xffffffffffffffff
+[ 3510.401123]
+
+In timeout handling, we should keep the original num_active_queues
+and reset num_req_queues to 0.
+
+Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Donglin Peng <pengdonglin@sangfor.com.cn>
+Cc: Huang Cun <huangcun@sangfor.com.cn>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 83cfc54a47062..4746ee517c75a 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -1863,7 +1863,7 @@ static int iavf_set_channels(struct net_device *netdev,
+       }
+       if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+               adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+-              adapter->num_active_queues = num_req;
++              adapter->num_req_queues = 0;
+               return -EOPNOTSUPP;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch b/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch

new file mode 100644 (file)

index 0000000..0e83715
--- /dev/null
+++ b/queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch
@@ -0,0 +1,190 @@
+From 045d5f68bcd8b2284e19c86bfd77bc8ae236d467 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:26 -0400
+Subject: iavf: fix reset task race with iavf_remove()
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit c34743daca0eb1dc855831a5210f0800a850088e ]
+
+The reset task is currently scheduled from the watchdog or adminq tasks.
+First, all direct calls to schedule the reset task are replaced with the
+iavf_schedule_reset(), which is modified to accept the flag showing the
+type of reset.
+
+To prevent the reset task from starting once iavf_remove() starts, we need
+to check the __IAVF_IN_REMOVE_TASK bit before we schedule it. This is now
+easily added to iavf_schedule_reset().
+
+Finally, remove the check for IAVF_FLAG_RESET_NEEDED in the watchdog task.
+It is redundant since all callers who set the flag immediately schedules
+the reset task.
+
+Fixes: 3ccd54ef44eb ("iavf: Fix init state closure on remove")
+Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h        |  2 +-
+ .../net/ethernet/intel/iavf/iavf_ethtool.c    |  8 ++---
+ drivers/net/ethernet/intel/iavf/iavf_main.c   | 32 +++++++------------
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   |  3 +-
+ 4 files changed, 16 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 305675042fe55..543931c06bb17 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -520,7 +520,7 @@ int iavf_up(struct iavf_adapter *adapter);
+ void iavf_down(struct iavf_adapter *adapter);
+ int iavf_process_config(struct iavf_adapter *adapter);
+ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
+-void iavf_schedule_reset(struct iavf_adapter *adapter);
++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
+ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+ void iavf_schedule_finish_config(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 73219c5069290..fd6d6f6263f66 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -532,8 +532,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+       /* issue a reset to force legacy-rx change to take effect */
+       if (changed_flags & IAVF_FLAG_LEGACY_RX) {
+               if (netif_running(netdev)) {
+-                      adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+-                      queue_work(adapter->wq, &adapter->reset_task);
++                      iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+                       ret = iavf_wait_for_reset(adapter);
+                       if (ret)
+                               netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
+@@ -676,8 +675,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
+       }
+ 
+       if (netif_running(netdev)) {
+-              adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+-              queue_work(adapter->wq, &adapter->reset_task);
++              iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+               ret = iavf_wait_for_reset(adapter);
+               if (ret)
+                       netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
+@@ -1860,7 +1858,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ 
+       adapter->num_req_queues = num_req;
+       adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+-      iavf_schedule_reset(adapter);
++      iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ 
+       ret = iavf_wait_for_reset(adapter);
+       if (ret)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 0e201d690f0dd..c1f91c55e1ca7 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -309,12 +309,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+ /**
+  * iavf_schedule_reset - Set the flags and schedule a reset event
+  * @adapter: board private structure
++ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED
+  **/
+-void iavf_schedule_reset(struct iavf_adapter *adapter)
++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
+ {
+-      if (!(adapter->flags &
+-            (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+-              adapter->flags |= IAVF_FLAG_RESET_NEEDED;
++      if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
++          !(adapter->flags &
++          (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
++              adapter->flags |= flags;
+               queue_work(adapter->wq, &adapter->reset_task);
+       }
+ }
+@@ -342,7 +344,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+       struct iavf_adapter *adapter = netdev_priv(netdev);
+ 
+       adapter->tx_timeout_count++;
+-      iavf_schedule_reset(adapter);
++      iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ }
+ 
+ /**
+@@ -2490,7 +2492,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
+                       adapter->vsi_res->num_queue_pairs);
+               adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
+               adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+-              iavf_schedule_reset(adapter);
++              iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ 
+               return -EAGAIN;
+       }
+@@ -2787,14 +2789,6 @@ static void iavf_watchdog_task(struct work_struct *work)
+       if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+               iavf_change_state(adapter, __IAVF_COMM_FAILED);
+ 
+-      if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
+-              adapter->aq_required = 0;
+-              adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+-              mutex_unlock(&adapter->crit_lock);
+-              queue_work(adapter->wq, &adapter->reset_task);
+-              return;
+-      }
+-
+       switch (adapter->state) {
+       case __IAVF_STARTUP:
+               iavf_startup(adapter);
+@@ -2922,11 +2916,10 @@ static void iavf_watchdog_task(struct work_struct *work)
+       /* check for hw reset */
+       reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+       if (!reg_val) {
+-              adapter->flags |= IAVF_FLAG_RESET_PENDING;
+               adapter->aq_required = 0;
+               adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+               dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+-              queue_work(adapter->wq, &adapter->reset_task);
++              iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+               mutex_unlock(&adapter->crit_lock);
+               queue_delayed_work(adapter->wq,
+                                  &adapter->watchdog_task, HZ * 2);
+@@ -3324,9 +3317,7 @@ static void iavf_adminq_task(struct work_struct *work)
+       } while (pending);
+       mutex_unlock(&adapter->crit_lock);
+ 
+-      if ((adapter->flags &
+-           (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+-          adapter->state == __IAVF_RESETTING)
++      if (iavf_is_reset_in_progress(adapter))
+               goto freedom;
+ 
+       /* check for error indications */
+@@ -4423,8 +4414,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+       }
+ 
+       if (netif_running(netdev)) {
+-              adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+-              queue_work(adapter->wq, &adapter->reset_task);
++              iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+               ret = iavf_wait_for_reset(adapter);
+               if (ret < 0)
+                       netdev_warn(netdev, "MTU change interrupted waiting for reset");
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 35419673b6987..2fc8e60ef6afb 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               case VIRTCHNL_EVENT_RESET_IMPENDING:
+                       dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
+                       if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
+-                              adapter->flags |= IAVF_FLAG_RESET_PENDING;
+                               dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+-                              queue_work(adapter->wq, &adapter->reset_task);
++                              iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+                       }
+                       break;
+               default:
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch b/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch

new file mode 100644 (file)

index 0000000..4191b7d
--- /dev/null
+++ b/queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch
@@ -0,0 +1,215 @@
+From 65df986e4dd0e7534d9caca118a4603cfb45336b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 19:11:47 +0800
+Subject: iavf: Fix use-after-free in free_netdev
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ]
+
+We do netif_napi_add() for all allocated q_vectors[], but potentially
+do netif_napi_del() for part of them, then kfree q_vectors and leave
+invalid pointers at dev->napi_list.
+
+Reproducer:
+
+  [root@host ~]# cat repro.sh
+  #!/bin/bash
+
+  pf_dbsf="0000:41:00.0"
+  vf0_dbsf="0000:41:02.0"
+  g_pids=()
+
+  function do_set_numvf()
+  {
+      echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+      echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+      sleep $((RANDOM%3+1))
+  }
+
+  function do_set_channel()
+  {
+      local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
+      [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
+      ifconfig $nic 192.168.18.5 netmask 255.255.255.0
+      ifconfig $nic up
+      ethtool -L $nic combined 1
+      ethtool -L $nic combined 4
+      sleep $((RANDOM%3))
+  }
+
+  function on_exit()
+  {
+      local pid
+      for pid in "${g_pids[@]}"; do
+          kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
+      done
+      g_pids=()
+  }
+
+  trap "on_exit; exit" EXIT
+
+  while :; do do_set_numvf ; done &
+  g_pids+=($!)
+  while :; do do_set_channel ; done &
+  g_pids+=($!)
+
+  wait
+
+Result:
+
+[ 4093.900222] ==================================================================
+[ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390
+[ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699
+[ 4093.900233]
+[ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G           O     --------- -t - 4.18.0 #1
+[ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021
+[ 4093.900239] Call Trace:
+[ 4093.900244]  dump_stack+0x71/0xab
+[ 4093.900249]  print_address_description+0x6b/0x290
+[ 4093.900251]  ? free_netdev+0x308/0x390
+[ 4093.900252]  kasan_report+0x14a/0x2b0
+[ 4093.900254]  free_netdev+0x308/0x390
+[ 4093.900261]  iavf_remove+0x825/0xd20 [iavf]
+[ 4093.900265]  pci_device_remove+0xa8/0x1f0
+[ 4093.900268]  device_release_driver_internal+0x1c6/0x460
+[ 4093.900271]  pci_stop_bus_device+0x101/0x150
+[ 4093.900273]  pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900275]  pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900277]  ? pci_iov_add_virtfn+0xe10/0xe10
+[ 4093.900278]  ? pci_get_subsys+0x90/0x90
+[ 4093.900280]  sriov_disable+0xed/0x3e0
+[ 4093.900282]  ? bus_find_device+0x12d/0x1a0
+[ 4093.900290]  i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900298]  ? i40e_reset_all_vfs+0x880/0x880 [i40e]
+[ 4093.900299]  ? pci_get_device+0x7c/0x90
+[ 4093.900300]  ? pci_get_subsys+0x90/0x90
+[ 4093.900306]  ? pci_vfs_assigned.part.7+0x144/0x210
+[ 4093.900309]  ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900315]  i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900318]  sriov_numvfs_store+0x214/0x290
+[ 4093.900320]  ? sriov_totalvfs_show+0x30/0x30
+[ 4093.900321]  ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900323]  ? __check_object_size+0x15a/0x350
+[ 4093.900326]  kernfs_fop_write+0x280/0x3f0
+[ 4093.900329]  vfs_write+0x145/0x440
+[ 4093.900330]  ksys_write+0xab/0x160
+[ 4093.900332]  ? __ia32_sys_read+0xb0/0xb0
+[ 4093.900334]  ? fput_many+0x1a/0x120
+[ 4093.900335]  ? filp_close+0xf0/0x130
+[ 4093.900338]  do_syscall_64+0xa0/0x370
+[ 4093.900339]  ? page_fault+0x8/0x30
+[ 4093.900341]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900357] RIP: 0033:0x7f16ad4d22c0
+[ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24
+[ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0
+[ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001
+[ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700
+[ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002
+[ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001
+[ 4093.900367]
+[ 4093.900368] Allocated by task 820:
+[ 4093.900371]  kasan_kmalloc+0xa6/0xd0
+[ 4093.900373]  __kmalloc+0xfb/0x200
+[ 4093.900376]  iavf_init_interrupt_scheme+0x63b/0x1320 [iavf]
+[ 4093.900380]  iavf_watchdog_task+0x3d51/0x52c0 [iavf]
+[ 4093.900382]  process_one_work+0x56a/0x11f0
+[ 4093.900383]  worker_thread+0x8f/0xf40
+[ 4093.900384]  kthread+0x2a0/0x390
+[ 4093.900385]  ret_from_fork+0x1f/0x40
+[ 4093.900387]  0xffffffffffffffff
+[ 4093.900387]
+[ 4093.900388] Freed by task 6699:
+[ 4093.900390]  __kasan_slab_free+0x137/0x190
+[ 4093.900391]  kfree+0x8b/0x1b0
+[ 4093.900394]  iavf_free_q_vectors+0x11d/0x1a0 [iavf]
+[ 4093.900397]  iavf_remove+0x35a/0xd20 [iavf]
+[ 4093.900399]  pci_device_remove+0xa8/0x1f0
+[ 4093.900400]  device_release_driver_internal+0x1c6/0x460
+[ 4093.900401]  pci_stop_bus_device+0x101/0x150
+[ 4093.900402]  pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900403]  pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900404]  sriov_disable+0xed/0x3e0
+[ 4093.900409]  i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900415]  i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900416]  sriov_numvfs_store+0x214/0x290
+[ 4093.900417]  kernfs_fop_write+0x280/0x3f0
+[ 4093.900418]  vfs_write+0x145/0x440
+[ 4093.900419]  ksys_write+0xab/0x160
+[ 4093.900420]  do_syscall_64+0xa0/0x370
+[ 4093.900421]  entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900422]  0xffffffffffffffff
+[ 4093.900422]
+[ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200
+                which belongs to the cache kmalloc-8k of size 8192
+[ 4093.900425] The buggy address is located 5184 bytes inside of
+                8192-byte region [ffff88b4dc144200, ffff88b4dc146200)
+[ 4093.900425] The buggy address belongs to the page:
+[ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0
+[ 4093.900430] flags: 0x10000000008100(slab|head)
+[ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80
+[ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000
+[ 4093.900434] page dumped because: kasan: bad access detected
+[ 4093.900435]
+[ 4093.900435] Memory state around the buggy address:
+[ 4093.900436]  ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900437]  ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438]                                            ^
+[ 4093.900439]  ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440]  ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440] ==================================================================
+
+Although the patch #2 (of 2) can avoid the issue triggered by this
+repro.sh, there still are other potential risks that if num_active_queues
+is changed to less than allocated q_vectors[] by unexpected, the
+mismatched netif_napi_add/del() can also cause UAF.
+
+Since we actually call netif_napi_add() for all allocated q_vectors
+unconditionally in iavf_alloc_q_vectors(), so we should fix it by
+letting netif_napi_del() match to netif_napi_add().
+
+Fixes: 5eae00c57f5e ("i40evf: main driver core")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Donglin Peng <pengdonglin@sangfor.com.cn>
+Cc: Huang Cun <huangcun@sangfor.com.cn>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 965d02d7ff80f..81676c3af4b36 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1840,19 +1840,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ {
+       int q_idx, num_q_vectors;
+-      int napi_vectors;
+ 
+       if (!adapter->q_vectors)
+               return;
+ 
+       num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+-      napi_vectors = adapter->num_active_queues;
+ 
+       for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+               struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+ 
+-              if (q_idx < napi_vectors)
+-                      netif_napi_del(&q_vector->napi);
++              netif_napi_del(&q_vector->napi);
+       }
+       kfree(adapter->q_vectors);
+       adapter->q_vectors = NULL;
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-make-functions-static-where-possible.patch b/queue-6.1/iavf-make-functions-static-where-possible.patch

new file mode 100644 (file)

index 0000000..4105b0d
--- /dev/null
+++ b/queue-6.1/iavf-make-functions-static-where-possible.patch
@@ -0,0 +1,223 @@
+From 97d8a9e529256a00151bc682e79efba868de17a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 08:54:05 -0700
+Subject: iavf: make functions static where possible
+
+From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+
+[ Upstream commit a4aadf0f5905661cd25c366b96cc1c840f05b756 ]
+
+Make all possible functions static.
+
+Move iavf_force_wb() up to avoid forward declaration.
+
+Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h      | 10 -----
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 14 +++----
+ drivers/net/ethernet/intel/iavf/iavf_txrx.c | 43 ++++++++++-----------
+ drivers/net/ethernet/intel/iavf/iavf_txrx.h |  4 --
+ 4 files changed, 28 insertions(+), 43 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 6625625f91e47..a716ed6bb787d 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -523,9 +523,6 @@ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+ void iavf_set_ethtool_ops(struct net_device *netdev);
+ void iavf_update_stats(struct iavf_adapter *adapter);
+-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter);
+-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter);
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter);
+ void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
+ void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
+ 
+@@ -579,17 +576,10 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+-int iavf_replace_primary_mac(struct iavf_adapter *adapter,
+-                           const u8 *new_mac);
+-void
+-iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
+-                             netdev_features_t prev_features,
+-                             netdev_features_t features);
+ void iavf_add_fdir_filter(struct iavf_adapter *adapter);
+ void iavf_del_fdir_filter(struct iavf_adapter *adapter);
+ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+                                       const u8 *macaddr);
+-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
+ #endif /* _IAVF_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 68e951fe5e210..d5b1dcfe0ccdd 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -253,7 +253,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
+  *
+  * Returns 0 on success, negative on failure
+  **/
+-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
++static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+ {
+       unsigned int wait, delay = 10;
+ 
+@@ -362,7 +362,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter)
+  * iavf_irq_enable_queues - Enable interrupt for all queues
+  * @adapter: board private structure
+  **/
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter)
++static void iavf_irq_enable_queues(struct iavf_adapter *adapter)
+ {
+       struct iavf_hw *hw = &adapter->hw;
+       int i;
+@@ -1003,8 +1003,8 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+  *
+  * Do not call this with mac_vlan_list_lock!
+  **/
+-int iavf_replace_primary_mac(struct iavf_adapter *adapter,
+-                           const u8 *new_mac)
++static int iavf_replace_primary_mac(struct iavf_adapter *adapter,
++                                  const u8 *new_mac)
+ {
+       struct iavf_hw *hw = &adapter->hw;
+       struct iavf_mac_filter *f;
+@@ -1860,7 +1860,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+  * @adapter: board private structure
+  *
+  **/
+-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
++static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+ {
+       if (!adapter->msix_entries)
+               return;
+@@ -1875,7 +1875,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+  * @adapter: board private structure to initialize
+  *
+  **/
+-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
++static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+ {
+       int err;
+ 
+@@ -2174,7 +2174,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
+  * the watchdog if any changes are requested to expedite the request via
+  * virtchnl.
+  **/
+-void
++static void
+ iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
+                              netdev_features_t prev_features,
+                              netdev_features_t features)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index e989feda133c1..8c5f6096b0022 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
+  * iavf_clean_tx_ring - Free any empty Tx buffers
+  * @tx_ring: ring to be cleaned
+  **/
+-void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
++static void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
+ {
+       unsigned long bi_size;
+       u16 i;
+@@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring)
+  * Since there is no access to the ring head register
+  * in XL710, we need to use our local copies
+  **/
+-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
++static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+ {
+       u32 head, tail;
+ 
+@@ -127,6 +127,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+       return 0;
+ }
+ 
++/**
++ * iavf_force_wb - Issue SW Interrupt so HW does a wb
++ * @vsi: the VSI we care about
++ * @q_vector: the vector on which to force writeback
++ **/
++static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
++{
++      u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
++                IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
++                IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
++                IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
++                /* allow 00 to be written to the index */;
++
++      wr32(&vsi->back->hw,
++           IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
++           val);
++}
++
+ /**
+  * iavf_detect_recover_hung - Function to detect and recover hung_queues
+  * @vsi:  pointer to vsi struct with tx queues
+@@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi,
+       q_vector->arm_wb_state = true;
+ }
+ 
+-/**
+- * iavf_force_wb - Issue SW Interrupt so HW does a wb
+- * @vsi: the VSI we care about
+- * @q_vector: the vector  on which to force writeback
+- *
+- **/
+-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
+-{
+-      u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+-                IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
+-                IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+-                IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
+-                /* allow 00 to be written to the index */;
+-
+-      wr32(&vsi->back->hw,
+-           IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
+-           val);
+-}
+-
+ static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector,
+                                       struct iavf_ring_container *rc)
+ {
+@@ -687,7 +686,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
+  * iavf_clean_rx_ring - Free Rx buffers
+  * @rx_ring: ring to be cleaned
+  **/
+-void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
++static void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
+ {
+       unsigned long bi_size;
+       u16 i;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+index 2624bf6d009e3..7e6ee32d19b69 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+@@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring)
+ 
+ bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count);
+ netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+-void iavf_clean_tx_ring(struct iavf_ring *tx_ring);
+-void iavf_clean_rx_ring(struct iavf_ring *rx_ring);
+ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring);
+ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring);
+ void iavf_free_tx_resources(struct iavf_ring *tx_ring);
+ void iavf_free_rx_resources(struct iavf_ring *rx_ring);
+ int iavf_napi_poll(struct napi_struct *napi, int budget);
+-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector);
+-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw);
+ void iavf_detect_recover_hung(struct iavf_vsi *vsi);
+ int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size);
+ bool __iavf_chk_linearize(struct sk_buff *skb);
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch b/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch

new file mode 100644 (file)

index 0000000..8927af5
--- /dev/null
+++ b/queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch
@@ -0,0 +1,95 @@
+From 5491562d5578b2fc118790482f43fbde751e023f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 17:42:27 +0100
+Subject: iavf: Move netdev_update_features() into watchdog task
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit 7598f4b40bd60e4a4280de645eb2893eea80b59d ]
+
+Remove netdev_update_features() from iavf_adminq_task(), as it can cause
+deadlocks due to needing rtnl_lock. Instead use the
+IAVF_FLAG_SETUP_NETDEV_FEATURES flag to indicate that netdev features need
+to be updated in the watchdog task. iavf_set_vlan_offload_features()
+and iavf_set_queue_vlan_tag_loc() can be called directly from
+iavf_virtchnl_completion().
+
+Suggested-by: Phani Burra <phani.r.burra@intel.com>
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Tested-by: Marek Szlosek <marek.szlosek@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c   | 27 +++++++------------
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   |  8 ++++++
+ 2 files changed, 17 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 104de9a071449..68e951fe5e210 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -2689,6 +2689,15 @@ static void iavf_watchdog_task(struct work_struct *work)
+               goto restart_watchdog;
+       }
+ 
++      if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
++          adapter->netdev_registered &&
++          !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
++          rtnl_trylock()) {
++              netdev_update_features(adapter->netdev);
++              rtnl_unlock();
++              adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
++      }
++
+       if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+               iavf_change_state(adapter, __IAVF_COMM_FAILED);
+ 
+@@ -3228,24 +3237,6 @@ static void iavf_adminq_task(struct work_struct *work)
+       } while (pending);
+       mutex_unlock(&adapter->crit_lock);
+ 
+-      if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
+-              if (adapter->netdev_registered ||
+-                  !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+-                      struct net_device *netdev = adapter->netdev;
+-
+-                      rtnl_lock();
+-                      netdev_update_features(netdev);
+-                      rtnl_unlock();
+-                      /* Request VLAN offload settings */
+-                      if (VLAN_V2_ALLOWED(adapter))
+-                              iavf_set_vlan_offload_features
+-                                      (adapter, 0, netdev->features);
+-
+-                      iavf_set_queue_vlan_tag_loc(adapter);
+-              }
+-
+-              adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+-      }
+       if ((adapter->flags &
+            (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+           adapter->state == __IAVF_RESETTING)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 00dccdd290dce..07d37402a0df5 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2237,6 +2237,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ 
+               iavf_process_config(adapter);
+               adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
++
++              /* Request VLAN offload settings */
++              if (VLAN_V2_ALLOWED(adapter))
++                      iavf_set_vlan_offload_features(adapter, 0,
++                                                     netdev->features);
++
++              iavf_set_queue_vlan_tag_loc(adapter);
++
+               was_mac_changed = !ether_addr_equal(netdev->dev_addr,
+                                                   adapter->hw.mac.addr);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch b/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch

new file mode 100644 (file)

index 0000000..1ee405d
--- /dev/null
+++ b/queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch
@@ -0,0 +1,66 @@
+From c45878593282d7f12a92cae3b219aeb3889e32f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Apr 2023 12:09:39 -0600
+Subject: iavf: send VLAN offloading caps once after VFR
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit 7dcbdf29282fbcdb646dc785e8a57ed2c2fec8ba ]
+
+When the user disables rxvlan offloading and then changes the number of
+channels, all VLAN ports are unable to receive traffic.
+
+Changing the number of channels triggers a VFR reset. During re-init, when
+VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS is received, we do:
+1 - set the IAVF_FLAG_SETUP_NETDEV_FEATURES flag
+2 - call
+    iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+
+The second step sends to the PF the __default__ features, in this case
+aq_required |= IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING
+
+While the first step forces the watchdog task to call
+netdev_update_features() ->  iavf_set_features() ->
+iavf_set_vlan_offload_features(adapter, netdev->features, features).
+Since the user disabled the "rxvlan", this sets:
+aq_required |= IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING
+
+When we start processing the AQ commands, both flags are enabled. Since we
+process DISABLE_XTAG first then ENABLE_XTAG, this results in the PF
+enabling the rxvlan offload. This breaks all communications on the VLAN
+net devices.
+
+Fix by removing the call to iavf_set_vlan_offload_features() (second
+step). Calling netdev_update_features() from watchdog task is enough for
+both init and reset paths.
+
+Fixes: 7598f4b40bd6 ("iavf: Move netdev_update_features() into watchdog task")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 07d37402a0df5..7b34111fd4eb1 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2238,11 +2238,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+               iavf_process_config(adapter);
+               adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+ 
+-              /* Request VLAN offload settings */
+-              if (VLAN_V2_ALLOWED(adapter))
+-                      iavf_set_vlan_offload_features(adapter, 0,
+-                                                     netdev->features);
+-
+               iavf_set_queue_vlan_tag_loc(adapter);
+ 
+               was_mac_changed = !ether_addr_equal(netdev->dev_addr,
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch b/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch

new file mode 100644 (file)

index 0000000..a24bcc6
--- /dev/null
+++ b/queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch
@@ -0,0 +1,65 @@
+From 7af6ff049c18a0c4e3e4a80b523c331617b48a6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 15:46:02 -0600
+Subject: iavf: use internal state to free traffic IRQs
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit a77ed5c5b768e9649be240a2d864e5cd9c6a2015 ]
+
+If the system tries to close the netdev while iavf_reset_task() is
+running, __LINK_STATE_START will be cleared and netif_running() will
+return false in iavf_reinit_interrupt_scheme(). This will result in
+iavf_free_traffic_irqs() not being called and a leak as follows:
+
+    [7632.489326] remove_proc_entry: removing non-empty directory 'irq/999', leaking at least 'iavf-enp24s0f0v0-TxRx-0'
+    [7632.490214] WARNING: CPU: 0 PID: 10 at fs/proc/generic.c:718 remove_proc_entry+0x19b/0x1b0
+
+is shown when pci_disable_msix() is later called. Fix by using the
+internal adapter state. The traffic IRQs will always exist if
+state == __IAVF_RUNNING.
+
+Fixes: 5b36e8d04b44 ("i40evf: Enable VF to request an alternate queue allocation")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 81676c3af4b36..104de9a071449 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1941,15 +1941,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter)
+ /**
+  * iavf_reinit_interrupt_scheme - Reallocate queues and vectors
+  * @adapter: board private structure
++ * @running: true if adapter->state == __IAVF_RUNNING
+  *
+  * Returns 0 on success, negative on failure
+  **/
+-static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
++static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running)
+ {
+       struct net_device *netdev = adapter->netdev;
+       int err;
+ 
+-      if (netif_running(netdev))
++      if (running)
+               iavf_free_traffic_irqs(adapter);
+       iavf_free_misc_irq(adapter);
+       iavf_reset_interrupt_capability(adapter);
+@@ -3056,7 +3057,7 @@ static void iavf_reset_task(struct work_struct *work)
+ 
+       if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+           (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
+-              err = iavf_reinit_interrupt_scheme(adapter);
++              err = iavf_reinit_interrupt_scheme(adapter, running);
+               if (err)
+                       goto reset_err;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch b/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch

new file mode 100644 (file)

index 0000000..4ff5364
--- /dev/null
+++ b/queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch
@@ -0,0 +1,253 @@
+From 666e6a1e4dfcf28dffd3be1e4128f2dde21ee8cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:22 -0400
+Subject: iavf: Wait for reset in callbacks which trigger it
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit c2ed2403f12c74a74a0091ed5d830e72c58406e8 ]
+
+There was a fail when trying to add the interface to bonding
+right after changing the MTU on the interface. It was caused
+by bonding interface unable to open the interface due to
+interface being in __RESETTING state because of MTU change.
+
+Add new reset_waitqueue to indicate that reset has finished.
+
+Add waiting for reset to finish in callbacks which trigger hw reset:
+iavf_set_priv_flags(), iavf_change_mtu() and iavf_set_ringparam().
+We use a 5000ms timeout period because on Hyper-V based systems,
+this operation takes around 3000-4000ms. In normal circumstances,
+it doesn't take more than 500ms to complete.
+
+Add a function iavf_wait_for_reset() to reuse waiting for reset code and
+use it also in iavf_set_channels(), which already waits for reset.
+We don't use error handling in iavf_set_channels() as this could
+cause the device to be in incorrect state if the reset was scheduled
+but hit timeout or the waitng function was interrupted by a signal.
+
+Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count")
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Co-developed-by: Dawid Wesierski <dawidx.wesierski@intel.com>
+Signed-off-by: Dawid Wesierski <dawidx.wesierski@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h        |  2 +
+ .../net/ethernet/intel/iavf/iavf_ethtool.c    | 31 ++++++-----
+ drivers/net/ethernet/intel/iavf/iavf_main.c   | 51 ++++++++++++++++++-
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c   |  1 +
+ 4 files changed, 68 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index a716ed6bb787d..2fe44e865d0a2 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -257,6 +257,7 @@ struct iavf_adapter {
+       struct work_struct adminq_task;
+       struct delayed_work client_task;
+       wait_queue_head_t down_waitqueue;
++      wait_queue_head_t reset_waitqueue;
+       wait_queue_head_t vc_waitqueue;
+       struct iavf_q_vector *q_vectors;
+       struct list_head vlan_filter_list;
+@@ -582,4 +583,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+                                       const u8 *macaddr);
++int iavf_wait_for_reset(struct iavf_adapter *adapter);
+ #endif /* _IAVF_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 4746ee517c75a..73219c5069290 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+ {
+       struct iavf_adapter *adapter = netdev_priv(netdev);
+       u32 orig_flags, new_flags, changed_flags;
++      int ret = 0;
+       u32 i;
+ 
+       orig_flags = READ_ONCE(adapter->flags);
+@@ -533,10 +534,13 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+               if (netif_running(netdev)) {
+                       adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+                       queue_work(adapter->wq, &adapter->reset_task);
++                      ret = iavf_wait_for_reset(adapter);
++                      if (ret)
++                              netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
+               }
+       }
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ /**
+@@ -627,6 +631,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
+ {
+       struct iavf_adapter *adapter = netdev_priv(netdev);
+       u32 new_rx_count, new_tx_count;
++      int ret = 0;
+ 
+       if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+               return -EINVAL;
+@@ -673,9 +678,12 @@ static int iavf_set_ringparam(struct net_device *netdev,
+       if (netif_running(netdev)) {
+               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+               queue_work(adapter->wq, &adapter->reset_task);
++              ret = iavf_wait_for_reset(adapter);
++              if (ret)
++                      netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
+       }
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ /**
+@@ -1830,7 +1838,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ {
+       struct iavf_adapter *adapter = netdev_priv(netdev);
+       u32 num_req = ch->combined_count;
+-      int i;
++      int ret = 0;
+ 
+       if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+           adapter->num_tc) {
+@@ -1854,20 +1862,11 @@ static int iavf_set_channels(struct net_device *netdev,
+       adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+       iavf_schedule_reset(adapter);
+ 
+-      /* wait for the reset is done */
+-      for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
+-              msleep(IAVF_RESET_WAIT_MS);
+-              if (adapter->flags & IAVF_FLAG_RESET_PENDING)
+-                      continue;
+-              break;
+-      }
+-      if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+-              adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+-              adapter->num_req_queues = 0;
+-              return -EOPNOTSUPP;
+-      }
++      ret = iavf_wait_for_reset(adapter);
++      if (ret)
++              netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset");
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ /**
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index d5b1dcfe0ccdd..c2739071149de 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -166,6 +166,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
+       return netdev_priv(pci_get_drvdata(pdev));
+ }
+ 
++/**
++ * iavf_is_reset_in_progress - Check if a reset is in progress
++ * @adapter: board private structure
++ */
++static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter)
++{
++      if (adapter->state == __IAVF_RESETTING ||
++          adapter->flags & (IAVF_FLAG_RESET_PENDING |
++                            IAVF_FLAG_RESET_NEEDED))
++              return true;
++
++      return false;
++}
++
++/**
++ * iavf_wait_for_reset - Wait for reset to finish.
++ * @adapter: board private structure
++ *
++ * Returns 0 if reset finished successfully, negative on timeout or interrupt.
++ */
++int iavf_wait_for_reset(struct iavf_adapter *adapter)
++{
++      int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue,
++                                      !iavf_is_reset_in_progress(adapter),
++                                      msecs_to_jiffies(5000));
++
++      /* If ret < 0 then it means wait was interrupted.
++       * If ret == 0 then it means we got a timeout while waiting
++       * for reset to finish.
++       * If ret > 0 it means reset has finished.
++       */
++      if (ret > 0)
++              return 0;
++      else if (ret < 0)
++              return -EINTR;
++      else
++              return -EBUSY;
++}
++
+ /**
+  * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
+  * @hw:   pointer to the HW structure
+@@ -3161,6 +3200,7 @@ static void iavf_reset_task(struct work_struct *work)
+ 
+       adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+ 
++      wake_up(&adapter->reset_waitqueue);
+       mutex_unlock(&adapter->client_lock);
+       mutex_unlock(&adapter->crit_lock);
+ 
+@@ -4325,6 +4365,7 @@ static int iavf_close(struct net_device *netdev)
+ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+ {
+       struct iavf_adapter *adapter = netdev_priv(netdev);
++      int ret = 0;
+ 
+       netdev_dbg(netdev, "changing MTU from %d to %d\n",
+                  netdev->mtu, new_mtu);
+@@ -4337,9 +4378,14 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+       if (netif_running(netdev)) {
+               adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+               queue_work(adapter->wq, &adapter->reset_task);
++              ret = iavf_wait_for_reset(adapter);
++              if (ret < 0)
++                      netdev_warn(netdev, "MTU change interrupted waiting for reset");
++              else if (ret)
++                      netdev_warn(netdev, "MTU change timed out waiting for reset");
+       }
+ 
+-      return 0;
++      return ret;
+ }
+ 
+ #define NETIF_VLAN_OFFLOAD_FEATURES   (NETIF_F_HW_VLAN_CTAG_RX | \
+@@ -4942,6 +4988,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+       /* Setup the wait queue for indicating transition to down status */
+       init_waitqueue_head(&adapter->down_waitqueue);
+ 
++      /* Setup the wait queue for indicating transition to running state */
++      init_waitqueue_head(&adapter->reset_waitqueue);
++
+       /* Setup the wait queue for indicating virtchannel events */
+       init_waitqueue_head(&adapter->vc_waitqueue);
+ 
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 7b34111fd4eb1..eec7ac3b7f6ee 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+       case VIRTCHNL_OP_ENABLE_QUEUES:
+               /* enable transmits */
+               iavf_irq_enable(adapter, true);
++              wake_up(&adapter->reset_waitqueue);
+               adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
+               break;
+       case VIRTCHNL_OP_DISABLE_QUEUES:
+-- 
+2.39.2
+
diff --git a/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch b/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch

new file mode 100644 (file)

index 0000000..0017c58
--- /dev/null
+++ b/queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch
@@ -0,0 +1,89 @@
+From 1fce30757b3c297f96e47f71e0c036d447f63664 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 10:47:32 -0700
+Subject: igb: Fix igb_down hung on surprise removal
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ]
+
+In a setup where a Thunderbolt hub connects to Ethernet and a display
+through USB Type-C, users may experience a hung task timeout when they
+remove the cable between the PC and the Thunderbolt hub.
+This is because the igb_down function is called multiple times when
+the Thunderbolt hub is unplugged. For example, the igb_io_error_detected
+triggers the first call, and the igb_remove triggers the second call.
+The second call to igb_down will block at napi_synchronize.
+Here's the call trace:
+    __schedule+0x3b0/0xddb
+    ? __mod_timer+0x164/0x5d3
+    schedule+0x44/0xa8
+    schedule_timeout+0xb2/0x2a4
+    ? run_local_timers+0x4e/0x4e
+    msleep+0x31/0x38
+    igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    __dev_close_many+0x95/0xec
+    dev_close_many+0x6e/0x103
+    unregister_netdevice_many+0x105/0x5b1
+    unregister_netdevice_queue+0xc2/0x10d
+    unregister_netdev+0x1c/0x23
+    igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+    pci_device_remove+0x3f/0x9c
+    device_release_driver_internal+0xfe/0x1b4
+    pci_stop_bus_device+0x5b/0x7f
+    pci_stop_bus_device+0x30/0x7f
+    pci_stop_bus_device+0x30/0x7f
+    pci_stop_and_remove_bus_device+0x12/0x19
+    pciehp_unconfigure_device+0x76/0xe9
+    pciehp_disable_slot+0x6e/0x131
+    pciehp_handle_presence_or_link_change+0x7a/0x3f7
+    pciehp_ist+0xbe/0x194
+    irq_thread_fn+0x22/0x4d
+    ? irq_thread+0x1fd/0x1fd
+    irq_thread+0x17b/0x1fd
+    ? irq_forced_thread_fn+0x5f/0x5f
+    kthread+0x142/0x153
+    ? __irq_get_irqchip_state+0x46/0x46
+    ? kthread_associate_blkcg+0x71/0x71
+    ret_from_fork+0x1f/0x30
+
+In this case, igb_io_error_detected detaches the network interface
+and requests a PCIE slot reset, however, the PCIE reset callback is
+not being invoked and thus the Ethernet connection breaks down.
+As the PCIE error in this case is a non-fatal one, requesting a
+slot reset can be avoided.
+This patch fixes the task hung issue and preserves Ethernet
+connection by ignoring non-fatal PCIE errors.
+
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 18ffbc892f86c..3e0444354632d 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -9585,6 +9585,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+       struct net_device *netdev = pci_get_drvdata(pdev);
+       struct igb_adapter *adapter = netdev_priv(netdev);
+ 
++      if (state == pci_channel_io_normal) {
++              dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n");
++              return PCI_ERS_RESULT_CAN_RECOVER;
++      }
++
+       netif_device_detach(netdev);
+ 
+       if (state == pci_channel_io_perm_failure)
+-- 
+2.39.2
+
diff --git a/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch b/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch

new file mode 100644 (file)

index 0000000..5aadd1a
--- /dev/null
+++ b/queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch
@@ -0,0 +1,61 @@
+From c01002df2d8dadbc072d6f4a641153969ae81dc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Apr 2023 09:36:11 +0200
+Subject: igc: Avoid transmit queue timeout for XDP
+
+From: Kurt Kanzenbach <kurt@linutronix.de>
+
+[ Upstream commit 95b681485563c64585de78662ee52d06b7fa47d9 ]
+
+High XDP load triggers the netdev watchdog:
+
+|NETDEV WATCHDOG: enp3s0 (igc): transmit queue 2 timed out
+
+The reason is the Tx queue transmission start (txq->trans_start) is not updated
+in XDP code path. Therefore, add it for all XDP transmission functions.
+
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: 78adb4bcf99e ("igc: Prevent garbled TX queue with XDP ZEROCOPY")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 273941f90f066..ade4bde47c65a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2402,6 +2402,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+       nq = txring_txq(ring);
+ 
+       __netif_tx_lock(nq, cpu);
++      /* Avoid transmit queue timeout since we share it with the slow path */
++      txq_trans_cond_update(nq);
+       res = igc_xdp_init_tx_descriptor(ring, xdpf);
+       __netif_tx_unlock(nq);
+       return res;
+@@ -2804,6 +2806,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+ 
+       __netif_tx_lock(nq, cpu);
+ 
++      /* Avoid transmit queue timeout since we share it with the slow path */
++      txq_trans_cond_update(nq);
++
+       budget = igc_desc_unused(ring);
+ 
+       while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+@@ -6297,6 +6302,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
+ 
+       __netif_tx_lock(nq, cpu);
+ 
++      /* Avoid transmit queue timeout since we share it with the slow path */
++      txq_trans_cond_update(nq);
++
+       drops = 0;
+       for (i = 0; i < num_frames; i++) {
+               int err;
+-- 
+2.39.2
+
diff --git a/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch b/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch

new file mode 100644 (file)

index 0000000..4254f23
--- /dev/null
+++ b/queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch
@@ -0,0 +1,79 @@
+From d6a3517285a333ba4076b9e7721da2053a4d7dd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 10:54:44 -0700
+Subject: igc: Prevent garbled TX queue with XDP ZEROCOPY
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 78adb4bcf99effbb960c5f9091e2e062509d1030 ]
+
+In normal operation, each populated queue item has
+next_to_watch pointing to the last TX desc of the packet,
+while each cleaned item has it set to 0. In particular,
+next_to_use that points to the next (necessarily clean)
+item to use has next_to_watch set to 0.
+
+When the TX queue is used both by an application using
+AF_XDP with ZEROCOPY as well as a second non-XDP application
+generating high traffic, the queue pointers can get in
+an invalid state where next_to_use points to an item
+where next_to_watch is NOT set to 0.
+
+However, the implementation assumes at several places
+that this is never the case, so if it does hold,
+bad things happen. In particular, within the loop inside
+of igc_clean_tx_irq(), next_to_clean can overtake next_to_use.
+Finally, this prevents any further transmission via
+this queue and it never gets unblocked or signaled.
+Secondly, if the queue is in this garbled state,
+the inner loop of igc_clean_tx_ring() will never terminate,
+completely hogging a CPU core.
+
+The reason is that igc_xdp_xmit_zc() reads next_to_use
+before acquiring the lock, and writing it back
+(potentially unmodified) later. If it got modified
+before locking, the outdated next_to_use is written
+pointing to an item that was already used elsewhere
+(and thus next_to_watch got written).
+
+Fixes: 9acf59a752d4 ("igc: Enable TX via AF_XDP zero-copy")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20230717175444.3217831-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index ade4bde47c65a..2e091a4a065e7 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2797,9 +2797,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+       struct netdev_queue *nq = txring_txq(ring);
+       union igc_adv_tx_desc *tx_desc = NULL;
+       int cpu = smp_processor_id();
+-      u16 ntu = ring->next_to_use;
+       struct xdp_desc xdp_desc;
+-      u16 budget;
++      u16 budget, ntu;
+ 
+       if (!netif_carrier_ok(ring->netdev))
+               return;
+@@ -2809,6 +2808,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+       /* Avoid transmit queue timeout since we share it with the slow path */
+       txq_trans_cond_update(nq);
+ 
++      ntu = ring->next_to_use;
+       budget = igc_desc_unused(ring);
+ 
+       while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch b/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch

new file mode 100644 (file)

index 0000000..5ee0e2c
--- /dev/null
+++ b/queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch
@@ -0,0 +1,151 @@
+From 84ac2024e94e7308d618a49933dee91acc662e7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 16:49:15 +0800
+Subject: kallsyms: Correctly sequence symbols when CONFIG_LTO_CLANG=y
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+[ Upstream commit 010a0aad39fccceba4a07d30d163158a39c704f3 ]
+
+LLVM appends various suffixes for local functions and variables, suffixes
+observed:
+ - foo.llvm.[0-9a-f]+
+ - foo.[0-9a-f]+
+
+Therefore, when CONFIG_LTO_CLANG=y, kallsyms_lookup_name() needs to
+truncate the suffix of the symbol name before comparing the local function
+or variable name.
+
+Old implementation code:
+-      if (strcmp(namebuf, name) == 0)
+-              return kallsyms_sym_address(i);
+-      if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
+-              return kallsyms_sym_address(i);
+
+The preceding process is traversed by address from low to high. That is,
+for those with the same name after the suffix is removed, the one with
+the smallest address is returned first. Therefore, when sorting in the
+tool, if the raw names are the same, they should be sorted by address in
+ascending order.
+
+ASCII[.]   = 2e
+ASCII[0-9] = 30,39
+ASCII[A-Z] = 41,5a
+ASCII[_]   = 5f
+ASCII[a-z] = 61,7a
+
+According to the preceding ASCII code values, the following sorting result
+is strictly followed.
+ ---------------------------------
+|    main-key     |    sub-key    |
+|---------------------------------|
+|                 |  addr_lowest  |
+| <name>          |      ...      |
+| <name>.<suffix> |      ...      |
+|                 |  addr_highest |
+|---------------------------------|
+| <name>?<others> |               |   //? is [_A-Za-z0-9]
+ ---------------------------------
+
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kallsyms.c      | 36 ++++++++++++++++++++++++++++++++++--
+ scripts/link-vmlinux.sh |  4 ++++
+ 2 files changed, 38 insertions(+), 2 deletions(-)
+
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index dcb744a067e5e..67ef9aa14a770 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -78,6 +78,7 @@ static unsigned int table_size, table_cnt;
+ static int all_symbols;
+ static int absolute_percpu;
+ static int base_relative;
++static int lto_clang;
+ 
+ static int token_profit[0x10000];
+ 
+@@ -89,7 +90,7 @@ static unsigned char best_table_len[256];
+ static void usage(void)
+ {
+       fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
+-                      "[--base-relative] in.map > out.S\n");
++                      "[--base-relative] [--lto-clang] in.map > out.S\n");
+       exit(1);
+ }
+ 
+@@ -411,6 +412,34 @@ static int symbol_absolute(const struct sym_entry *s)
+       return s->percpu_absolute;
+ }
+ 
++static char * s_name(char *buf)
++{
++      /* Skip the symbol type */
++      return buf + 1;
++}
++
++static void cleanup_symbol_name(char *s)
++{
++      char *p;
++
++      if (!lto_clang)
++              return;
++
++      /*
++       * ASCII[.]   = 2e
++       * ASCII[0-9] = 30,39
++       * ASCII[A-Z] = 41,5a
++       * ASCII[_]   = 5f
++       * ASCII[a-z] = 61,7a
++       *
++       * As above, replacing '.' with '\0' does not affect the main sorting,
++       * but it helps us with subsorting.
++       */
++      p = strchr(s, '.');
++      if (p)
++              *p = '\0';
++}
++
+ static int compare_names(const void *a, const void *b)
+ {
+       int ret;
+@@ -421,7 +450,9 @@ static int compare_names(const void *a, const void *b)
+ 
+       expand_symbol(sa->sym, sa->len, sa_namebuf);
+       expand_symbol(sb->sym, sb->len, sb_namebuf);
+-      ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]);
++      cleanup_symbol_name(s_name(sa_namebuf));
++      cleanup_symbol_name(s_name(sb_namebuf));
++      ret = strcmp(s_name(sa_namebuf), s_name(sb_namebuf));
+       if (!ret) {
+               if (sa->addr > sb->addr)
+                       return 1;
+@@ -855,6 +886,7 @@ int main(int argc, char **argv)
+                       {"all-symbols",     no_argument, &all_symbols,     1},
+                       {"absolute-percpu", no_argument, &absolute_percpu, 1},
+                       {"base-relative",   no_argument, &base_relative,   1},
++                      {"lto-clang",       no_argument, &lto_clang,       1},
+                       {},
+               };
+ 
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 918470d768e9c..32e573943cf03 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -156,6 +156,10 @@ kallsyms()
+               kallsymopt="${kallsymopt} --base-relative"
+       fi
+ 
++      if is_enabled CONFIG_LTO_CLANG; then
++              kallsymopt="${kallsymopt} --lto-clang"
++      fi
++
+       info KSYMS ${2}
+       scripts/kallsyms ${kallsymopt} ${1} > ${2}
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch b/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch

new file mode 100644 (file)

index 0000000..9b63380
--- /dev/null
+++ b/queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch
@@ -0,0 +1,241 @@
+From 0abbf42237e70e5ca1bdbcd75de6eed8c1bd4077 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 16:49:14 +0800
+Subject: kallsyms: Improve the performance of kallsyms_lookup_name()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+[ Upstream commit 60443c88f3a89fd303a9e8c0e84895910675c316 ]
+
+Currently, to search for a symbol, we need to expand the symbols in
+'kallsyms_names' one by one, and then use the expanded string for
+comparison. It's O(n).
+
+If we sort names in ascending order like addresses, we can also use
+binary search. It's O(log(n)).
+
+In order not to change the implementation of "/proc/kallsyms", the table
+kallsyms_names[] is still stored in a one-to-one correspondence with the
+address in ascending order.
+
+Add array kallsyms_seqs_of_names[], it's indexed by the sequence number
+of the sorted names, and the corresponding content is the sequence number
+of the sorted addresses. For example:
+Assume that the index of NameX in array kallsyms_seqs_of_names[] is 'i',
+the content of kallsyms_seqs_of_names[i] is 'k', then the corresponding
+address of NameX is kallsyms_addresses[k]. The offset in kallsyms_names[]
+is get_symbol_offset(k).
+
+Note that the memory usage will increase by (4 * kallsyms_num_syms)
+bytes, the next two patches will reduce (1 * kallsyms_num_syms) bytes
+and properly handle the case CONFIG_LTO_CLANG=y.
+
+Performance test results: (x86)
+Before:
+min=234, max=10364402, avg=5206926
+min=267, max=11168517, avg=5207587
+After:
+min=1016, max=90894, avg=7272
+min=1014, max=93470, avg=7293
+
+The average lookup performance of kallsyms_lookup_name() improved 715x.
+
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kallsyms.c          | 86 +++++++++++++++++++++++++++++++++-----
+ kernel/kallsyms_internal.h |  1 +
+ scripts/kallsyms.c         | 37 ++++++++++++++++
+ 3 files changed, 113 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index 60c20f301a6ba..ba351dfa109b6 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -187,26 +187,90 @@ static bool cleanup_symbol_name(char *s)
+       return false;
+ }
+ 
++static int compare_symbol_name(const char *name, char *namebuf)
++{
++      int ret;
++
++      ret = strcmp(name, namebuf);
++      if (!ret)
++              return ret;
++
++      if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf))
++              return 0;
++
++      return ret;
++}
++
++static int kallsyms_lookup_names(const char *name,
++                               unsigned int *start,
++                               unsigned int *end)
++{
++      int ret;
++      int low, mid, high;
++      unsigned int seq, off;
++      char namebuf[KSYM_NAME_LEN];
++
++      low = 0;
++      high = kallsyms_num_syms - 1;
++
++      while (low <= high) {
++              mid = low + (high - low) / 2;
++              seq = kallsyms_seqs_of_names[mid];
++              off = get_symbol_offset(seq);
++              kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++              ret = compare_symbol_name(name, namebuf);
++              if (ret > 0)
++                      low = mid + 1;
++              else if (ret < 0)
++                      high = mid - 1;
++              else
++                      break;
++      }
++
++      if (low > high)
++              return -ESRCH;
++
++      low = mid;
++      while (low) {
++              seq = kallsyms_seqs_of_names[low - 1];
++              off = get_symbol_offset(seq);
++              kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++              if (compare_symbol_name(name, namebuf))
++                      break;
++              low--;
++      }
++      *start = low;
++
++      if (end) {
++              high = mid;
++              while (high < kallsyms_num_syms - 1) {
++                      seq = kallsyms_seqs_of_names[high + 1];
++                      off = get_symbol_offset(seq);
++                      kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++                      if (compare_symbol_name(name, namebuf))
++                              break;
++                      high++;
++              }
++              *end = high;
++      }
++
++      return 0;
++}
++
+ /* Lookup the address for this symbol. Returns 0 if not found. */
+ unsigned long kallsyms_lookup_name(const char *name)
+ {
+-      char namebuf[KSYM_NAME_LEN];
+-      unsigned long i;
+-      unsigned int off;
++      int ret;
++      unsigned int i;
+ 
+       /* Skip the search for empty string. */
+       if (!*name)
+               return 0;
+ 
+-      for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+-              off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+-
+-              if (strcmp(namebuf, name) == 0)
+-                      return kallsyms_sym_address(i);
++      ret = kallsyms_lookup_names(name, &i, NULL);
++      if (!ret)
++              return kallsyms_sym_address(kallsyms_seqs_of_names[i]);
+ 
+-              if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
+-                      return kallsyms_sym_address(i);
+-      }
+       return module_kallsyms_lookup_name(name);
+ }
+ 
+diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
+index 2d0c6f2f0243a..a04b7a5cb1e3e 100644
+--- a/kernel/kallsyms_internal.h
++++ b/kernel/kallsyms_internal.h
+@@ -26,5 +26,6 @@ extern const char kallsyms_token_table[] __weak;
+ extern const u16 kallsyms_token_index[] __weak;
+ 
+ extern const unsigned int kallsyms_markers[] __weak;
++extern const unsigned int kallsyms_seqs_of_names[] __weak;
+ 
+ #endif // LINUX_KALLSYMS_INTERNAL_H_
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 03fa07ad45d95..dcb744a067e5e 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -49,6 +49,7 @@ _Static_assert(
+ struct sym_entry {
+       unsigned long long addr;
+       unsigned int len;
++      unsigned int seq;
+       unsigned int start_pos;
+       unsigned int percpu_absolute;
+       unsigned char sym[];
+@@ -410,6 +411,35 @@ static int symbol_absolute(const struct sym_entry *s)
+       return s->percpu_absolute;
+ }
+ 
++static int compare_names(const void *a, const void *b)
++{
++      int ret;
++      char sa_namebuf[KSYM_NAME_LEN];
++      char sb_namebuf[KSYM_NAME_LEN];
++      const struct sym_entry *sa = *(const struct sym_entry **)a;
++      const struct sym_entry *sb = *(const struct sym_entry **)b;
++
++      expand_symbol(sa->sym, sa->len, sa_namebuf);
++      expand_symbol(sb->sym, sb->len, sb_namebuf);
++      ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]);
++      if (!ret) {
++              if (sa->addr > sb->addr)
++                      return 1;
++              else if (sa->addr < sb->addr)
++                      return -1;
++
++              /* keep old order */
++              return (int)(sa->seq - sb->seq);
++      }
++
++      return ret;
++}
++
++static void sort_symbols_by_name(void)
++{
++      qsort(table, table_cnt, sizeof(table[0]), compare_names);
++}
++
+ static void write_src(void)
+ {
+       unsigned int i, k, off;
+@@ -495,6 +525,7 @@ static void write_src(void)
+       for (i = 0; i < table_cnt; i++) {
+               if ((i & 0xFF) == 0)
+                       markers[i >> 8] = off;
++              table[i]->seq = i;
+ 
+               /* There cannot be any symbol of length zero. */
+               if (table[i]->len == 0) {
+@@ -535,6 +566,12 @@ static void write_src(void)
+ 
+       free(markers);
+ 
++      sort_symbols_by_name();
++      output_label("kallsyms_seqs_of_names");
++      for (i = 0; i < table_cnt; i++)
++              printf("\t.long\t%u\n", table[i]->seq);
++      printf("\n");
++
+       output_label("kallsyms_token_table");
+       off = 0;
+       for (i = 0; i < 256; i++) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch b/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch

new file mode 100644 (file)

index 0000000..e74c07b
--- /dev/null
+++ b/queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch
@@ -0,0 +1,104 @@
+From 8ed9d429c7185d4b3fe9ef6360e3f9e6f63265c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 11:19:26 -0700
+Subject: kallsyms: strip LTO-only suffixes from promoted global functions
+
+From: Yonghong Song <yhs@fb.com>
+
+[ Upstream commit 8cc32a9bbf2934d90762d9de0187adcb5ad46a11 ]
+
+Commit 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions")
+stripped all function/variable suffixes started with '.' regardless
+of whether those suffixes are generated at LTO mode or not. In fact,
+as far as I know, in LTO mode, when a static function/variable is
+promoted to the global scope, '.llvm.<...>' suffix is added.
+
+The existing mechanism breaks live patch for a LTO kernel even if
+no <symbol>.llvm.<...> symbols are involved. For example, for the following
+kernel symbols:
+  $ grep bpf_verifier_vlog /proc/kallsyms
+  ffffffff81549f60 t bpf_verifier_vlog
+  ffffffff8268b430 d bpf_verifier_vlog._entry
+  ffffffff8282a958 d bpf_verifier_vlog._entry_ptr
+  ffffffff82e12a1f d bpf_verifier_vlog.__already_done
+'bpf_verifier_vlog' is a static function. '_entry', '_entry_ptr' and
+'__already_done' are static variables used inside 'bpf_verifier_vlog',
+so llvm promotes them to file-level static with prefix 'bpf_verifier_vlog.'.
+Note that the func-level to file-level static function promotion also
+happens without LTO.
+
+Given a symbol name 'bpf_verifier_vlog', with LTO kernel, current mechanism will
+return 4 symbols to live patch subsystem which current live patching
+subsystem cannot handle it. With non-LTO kernel, only one symbol
+is returned.
+
+In [1], we have a lengthy discussion, the suggestion is to separate two
+cases:
+  (1). new symbols with suffix which are generated regardless of whether
+       LTO is enabled or not, and
+  (2). new symbols with suffix generated only when LTO is enabled.
+
+The cleanup_symbol_name() should only remove suffixes for case (2).
+Case (1) should not be changed so it can work uniformly with or without LTO.
+
+This patch removed LTO-only suffix '.llvm.<...>' so live patching and
+tracing should work the same way for non-LTO kernel.
+The cleanup_symbol_name() in scripts/kallsyms.c is also changed to have the same
+filtering pattern so both kernel and kallsyms tool have the same
+expectation on the order of symbols.
+
+ [1] https://lore.kernel.org/live-patching/20230615170048.2382735-1-song@kernel.org/T/#u
+
+Fixes: 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions")
+Reported-by: Song Liu <song@kernel.org>
+Signed-off-by: Yonghong Song <yhs@fb.com>
+Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230628181926.4102448-1-yhs@fb.com
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kallsyms.c  | 5 ++---
+ scripts/kallsyms.c | 6 +++---
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index ba351dfa109b6..676328a7c8c75 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -174,11 +174,10 @@ static bool cleanup_symbol_name(char *s)
+        * LLVM appends various suffixes for local functions and variables that
+        * must be promoted to global scope as part of LTO.  This can break
+        * hooking of static functions with kprobes. '.' is not a valid
+-       * character in an identifier in C. Suffixes observed:
++       * character in an identifier in C. Suffixes only in LLVM LTO observed:
+        * - foo.llvm.[0-9a-f]+
+-       * - foo.[0-9a-f]+
+        */
+-      res = strchr(s, '.');
++      res = strstr(s, ".llvm.");
+       if (res) {
+               *res = '\0';
+               return true;
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 67ef9aa14a770..51edc73e2ebf8 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -432,10 +432,10 @@ static void cleanup_symbol_name(char *s)
+        * ASCII[_]   = 5f
+        * ASCII[a-z] = 61,7a
+        *
+-       * As above, replacing '.' with '\0' does not affect the main sorting,
+-       * but it helps us with subsorting.
++       * As above, replacing the first '.' in ".llvm." with '\0' does not
++       * affect the main sorting, but it helps us with subsorting.
+        */
+-      p = strchr(s, '.');
++      p = strstr(s, ".llvm.");
+       if (p)
+               *p = '\0';
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch b/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch

new file mode 100644 (file)

index 0000000..f12f3fb
--- /dev/null
+++ b/queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch
@@ -0,0 +1,50 @@
+From e9fa3eef2ea63154cf4655e320d9deee9b91fb21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 10:41:51 -0700
+Subject: llc: Don't drop packet from non-root netns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ]
+
+Now these upper layer protocol handlers can be called from llc_rcv()
+as sap->rcv_func(), which is registered by llc_sap_open().
+
+  * function which is passed to register_8022_client()
+    -> no in-kernel user calls register_8022_client().
+
+  * snap_rcv()
+    `- proto->rcvfunc() : registered by register_snap_client()
+       -> aarp_rcv() and atalk_rcv() drop packets from non-root netns
+
+  * stp_pdu_rcv()
+    `- garp_protos[]->rcv() : registered by stp_proto_register()
+       -> garp_pdu_rcv() and br_stp_rcv() are netns-aware
+
+So, we can safely remove the netns restriction in llc_rcv().
+
+Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/llc/llc_input.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
+index c309b72a58779..7cac441862e21 100644
+--- a/net/llc/llc_input.c
++++ b/net/llc/llc_input.c
+@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
+       void (*sta_handler)(struct sk_buff *skb);
+       void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
+ 
+-      if (!net_eq(dev_net(dev), &init_net))
+-              goto drop;
+-
+       /*
+        * When the interface is in promisc. mode, drop all the crap that it
+        * receives, do not try to analyse it.
+-- 
+2.39.2
+
diff --git a/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch b/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch

new file mode 100644 (file)

index 0000000..3ae6861
--- /dev/null
+++ b/queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch
@@ -0,0 +1,56 @@
+From ef01382e1c734299b56bde7f6a5678e14939f8a4 Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Thu, 22 Jun 2023 17:43:57 -0600
+Subject: [PATCH AUTOSEL 4.19 09/11] MIPS: dec: prom: Address -Warray-bounds
+ warning
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 7b191b9b55df2a844bd32d1d380f47a7df1c2896 ]
+
+Zero-length arrays are deprecated, and we are replacing them with flexible
+array members instead. So, replace zero-length array with flexible-array
+member in struct memmap.
+
+Address the following warning found after building (with GCC-13) mips64
+with decstation_64_defconfig:
+In function 'rex_setup_memory_region',
+    inlined from 'prom_meminit' at arch/mips/dec/prom/memory.c:91:3:
+arch/mips/dec/prom/memory.c:72:31: error: array subscript i is outside array bounds of 'unsigned char[0]' [-Werror=array-bounds=]
+   72 |                 if (bm->bitmap[i] == 0xff)
+      |                     ~~~~~~~~~~^~~
+In file included from arch/mips/dec/prom/memory.c:16:
+./arch/mips/include/asm/dec/prom.h: In function 'prom_meminit':
+./arch/mips/include/asm/dec/prom.h:73:23: note: while referencing 'bitmap'
+   73 |         unsigned char bitmap[0];
+
+This helps with the ongoing efforts to globally enable -Warray-bounds.
+
+This results in no differences in binary output.
+
+Link: https://github.com/KSPP/linux/issues/79
+Link: https://github.com/KSPP/linux/issues/323
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/mips/include/asm/dec/prom.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h
+index 09538ff5e9245..6f0405ba27d6d 100644
+--- a/arch/mips/include/asm/dec/prom.h
++++ b/arch/mips/include/asm/dec/prom.h
+@@ -74,7 +74,7 @@ static inline bool prom_is_rex(u32 magic)
+  */
+ typedef struct {
+       int pagesize;
+-      unsigned char bitmap[0];
++      unsigned char bitmap[];
+ } memmap;
+ 
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch b/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch

new file mode 100644 (file)

index 0000000..a4550bd
--- /dev/null
+++ b/queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch
@@ -0,0 +1,94 @@
+From 2ad98a4006851a288ac932c2345ea6a91933390c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 17:46:22 -0700
+Subject: net: dsa: microchip: correct KSZ8795 static MAC table access
+
+From: Tristram Ha <Tristram.Ha@microchip.com>
+
+[ Upstream commit 4bdf79d686b49ac49373b36466acfb93972c7d7c ]
+
+The KSZ8795 driver code was modified to use on KSZ8863/73, which has
+different register definitions.  Some of the new KSZ8795 register
+information are wrong compared to previous code.
+
+KSZ8795 also behaves differently in that the STATIC_MAC_TABLE_USE_FID
+and STATIC_MAC_TABLE_FID bits are off by 1 when doing MAC table reading
+than writing.  To compensate that a special code was added to shift the
+register value by 1 before applying those bits.  This is wrong when the
+code is running on KSZ8863, so this special code is only executed when
+KSZ8795 is detected.
+
+Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips")
+Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
+Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c    | 8 +++++++-
+ drivers/net/dsa/microchip/ksz_common.c | 8 ++++----
+ drivers/net/dsa/microchip/ksz_common.h | 7 +++++++
+ 3 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 6639fae56da7f..c63e082dc57dc 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -437,7 +437,13 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+               (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+                       shifts[STATIC_MAC_FWD_PORTS];
+       alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
+-      data_hi >>= 1;
++
++      /* KSZ8795 family switches have STATIC_MAC_TABLE_USE_FID and
++       * STATIC_MAC_TABLE_FID definitions off by 1 when doing read on the
++       * static MAC table compared to doing write.
++       */
++      if (ksz_is_ksz87xx(dev))
++              data_hi >>= 1;
+       alu->is_static = true;
+       alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+       alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 3d59298eaa5cf..8c492d56d2c36 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -286,13 +286,13 @@ static const u32 ksz8795_masks[] = {
+       [STATIC_MAC_TABLE_VALID]        = BIT(21),
+       [STATIC_MAC_TABLE_USE_FID]      = BIT(23),
+       [STATIC_MAC_TABLE_FID]          = GENMASK(30, 24),
+-      [STATIC_MAC_TABLE_OVERRIDE]     = BIT(26),
+-      [STATIC_MAC_TABLE_FWD_PORTS]    = GENMASK(24, 20),
++      [STATIC_MAC_TABLE_OVERRIDE]     = BIT(22),
++      [STATIC_MAC_TABLE_FWD_PORTS]    = GENMASK(20, 16),
+       [DYNAMIC_MAC_TABLE_ENTRIES_H]   = GENMASK(6, 0),
+-      [DYNAMIC_MAC_TABLE_MAC_EMPTY]   = BIT(8),
++      [DYNAMIC_MAC_TABLE_MAC_EMPTY]   = BIT(7),
+       [DYNAMIC_MAC_TABLE_NOT_READY]   = BIT(7),
+       [DYNAMIC_MAC_TABLE_ENTRIES]     = GENMASK(31, 29),
+-      [DYNAMIC_MAC_TABLE_FID]         = GENMASK(26, 20),
++      [DYNAMIC_MAC_TABLE_FID]         = GENMASK(22, 16),
+       [DYNAMIC_MAC_TABLE_SRC_PORT]    = GENMASK(26, 24),
+       [DYNAMIC_MAC_TABLE_TIMESTAMP]   = GENMASK(28, 27),
+       [P_MII_TX_FLOW_CTRL]            = BIT(5),
+diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
+index 9cfa179575ce8..d1b2db8e65331 100644
+--- a/drivers/net/dsa/microchip/ksz_common.h
++++ b/drivers/net/dsa/microchip/ksz_common.h
+@@ -512,6 +512,13 @@ static inline void ksz_regmap_unlock(void *__mtx)
+       mutex_unlock(mtx);
+ }
+ 
++static inline bool ksz_is_ksz87xx(struct ksz_device *dev)
++{
++      return dev->chip_id == KSZ8795_CHIP_ID ||
++             dev->chip_id == KSZ8794_CHIP_ID ||
++             dev->chip_id == KSZ8765_CHIP_ID;
++}
++
+ static inline bool ksz_is_ksz88x3(struct ksz_device *dev)
+ {
+       return dev->chip_id == KSZ8830_CHIP_ID;
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch b/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch

new file mode 100644 (file)

index 0000000..394b251
--- /dev/null
+++ b/queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch
@@ -0,0 +1,54 @@
+From 25ba53cf4a6b0cb809c74f265b2e1cd0d00ea850 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:38 +0200
+Subject: net: dsa: microchip: ksz8: Make ksz8_r_sta_mac_table() static
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit b5751cdd7dbe618a03951bdd4c982a71ba448b1b ]
+
+As ksz8_r_sta_mac_table() is only used within ksz8795.c, there is no need
+to export it. Make the function static for better encapsulation.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8.h    | 2 --
+ drivers/net/dsa/microchip/ksz8795.c | 4 ++--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
+index 8582b4b67d989..28137c4bf2928 100644
+--- a/drivers/net/dsa/microchip/ksz8.h
++++ b/drivers/net/dsa/microchip/ksz8.h
+@@ -21,8 +21,6 @@ int ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
+ int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
+ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+                        u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries);
+-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+-                       struct alu_struct *alu);
+ void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+                         struct alu_struct *alu);
+ void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt);
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 38fd9b8e0287a..a2f67be66b97d 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -406,8 +406,8 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+       return rc;
+ }
+ 
+-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+-                       struct alu_struct *alu)
++static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
++                              struct alu_struct *alu)
+ {
+       u32 data_hi, data_lo;
+       const u8 *shifts;
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch b/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch

new file mode 100644 (file)

index 0000000..61558ee
--- /dev/null
+++ b/queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch
@@ -0,0 +1,111 @@
+From 07866a478229526bd65ea5676f89ffc143c3e040 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:36 +0200
+Subject: net: dsa: microchip: ksz8: Separate static MAC table operations for
+ code reuse
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit f6636ff69ec4f2c94a5ee1d032b21cfe1e0a5678 ]
+
+Move static MAC table operations to separate functions in order to reuse
+the code for add/del_fdb. This is needed to address kernel warnings
+caused by the lack of fdb add function support in the current driver.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c | 34 +++++++++++++++++++----------
+ 1 file changed, 23 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 22250ae222b5b..38fd9b8e0287a 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -926,8 +926,8 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port,
+       return ret;
+ }
+ 
+-int ksz8_mdb_add(struct ksz_device *dev, int port,
+-               const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++static int ksz8_add_sta_mac(struct ksz_device *dev, int port,
++                          const unsigned char *addr, u16 vid)
+ {
+       struct alu_struct alu;
+       int index;
+@@ -937,8 +937,8 @@ int ksz8_mdb_add(struct ksz_device *dev, int port,
+       for (index = 0; index < dev->info->num_statics; index++) {
+               if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+                       /* Found one already in static MAC table. */
+-                      if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+-                          alu.fid == mdb->vid)
++                      if (!memcmp(alu.mac, addr, ETH_ALEN) &&
++                          alu.fid == vid)
+                               break;
+               /* Remember the first empty entry. */
+               } else if (!empty) {
+@@ -954,23 +954,23 @@ int ksz8_mdb_add(struct ksz_device *dev, int port,
+       if (index == dev->info->num_statics) {
+               index = empty - 1;
+               memset(&alu, 0, sizeof(alu));
+-              memcpy(alu.mac, mdb->addr, ETH_ALEN);
++              memcpy(alu.mac, addr, ETH_ALEN);
+               alu.is_static = true;
+       }
+       alu.port_forward |= BIT(port);
+-      if (mdb->vid) {
++      if (vid) {
+               alu.is_use_fid = true;
+ 
+               /* Need a way to map VID to FID. */
+-              alu.fid = mdb->vid;
++              alu.fid = vid;
+       }
+       ksz8_w_sta_mac_table(dev, index, &alu);
+ 
+       return 0;
+ }
+ 
+-int ksz8_mdb_del(struct ksz_device *dev, int port,
+-               const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++static int ksz8_del_sta_mac(struct ksz_device *dev, int port,
++                          const unsigned char *addr, u16 vid)
+ {
+       struct alu_struct alu;
+       int index;
+@@ -978,8 +978,8 @@ int ksz8_mdb_del(struct ksz_device *dev, int port,
+       for (index = 0; index < dev->info->num_statics; index++) {
+               if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+                       /* Found one already in static MAC table. */
+-                      if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+-                          alu.fid == mdb->vid)
++                      if (!memcmp(alu.mac, addr, ETH_ALEN) &&
++                          alu.fid == vid)
+                               break;
+               }
+       }
+@@ -998,6 +998,18 @@ int ksz8_mdb_del(struct ksz_device *dev, int port,
+       return 0;
+ }
+ 
++int ksz8_mdb_add(struct ksz_device *dev, int port,
++               const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++{
++      return ksz8_add_sta_mac(dev, port, mdb->addr, mdb->vid);
++}
++
++int ksz8_mdb_del(struct ksz_device *dev, int port,
++               const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++{
++      return ksz8_del_sta_mac(dev, port, mdb->addr, mdb->vid);
++}
++
+ int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
+                            struct netlink_ext_ack *extack)
+ {
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch b/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch

new file mode 100644 (file)

index 0000000..7ffbd3f
--- /dev/null
+++ b/queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch
@@ -0,0 +1,154 @@
+From fe300e7a9fd658eb7004931d40d174aea1c803a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:39 +0200
+Subject: net: dsa: microchip: ksz8_r_sta_mac_table(): Avoid using error code
+ for empty entries
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 559901b46810e82ba5321a5e789f994b65d3bc3d ]
+
+Prepare for the next patch by ensuring that ksz8_r_sta_mac_table() does
+not use error codes for empty entries. This change will enable better
+handling of read/write errors in the upcoming patch.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c | 87 +++++++++++++++++------------
+ 1 file changed, 50 insertions(+), 37 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index a2f67be66b97d..6639fae56da7f 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -407,7 +407,7 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ }
+ 
+ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+-                              struct alu_struct *alu)
++                              struct alu_struct *alu, bool *valid)
+ {
+       u32 data_hi, data_lo;
+       const u8 *shifts;
+@@ -420,28 +420,32 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+       ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
+       data_hi = data >> 32;
+       data_lo = (u32)data;
+-      if (data_hi & (masks[STATIC_MAC_TABLE_VALID] |
+-                      masks[STATIC_MAC_TABLE_OVERRIDE])) {
+-              alu->mac[5] = (u8)data_lo;
+-              alu->mac[4] = (u8)(data_lo >> 8);
+-              alu->mac[3] = (u8)(data_lo >> 16);
+-              alu->mac[2] = (u8)(data_lo >> 24);
+-              alu->mac[1] = (u8)data_hi;
+-              alu->mac[0] = (u8)(data_hi >> 8);
+-              alu->port_forward =
+-                      (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+-                              shifts[STATIC_MAC_FWD_PORTS];
+-              alu->is_override =
+-                      (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
+-              data_hi >>= 1;
+-              alu->is_static = true;
+-              alu->is_use_fid =
+-                      (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+-              alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+-                              shifts[STATIC_MAC_FID];
++
++      if (!(data_hi & (masks[STATIC_MAC_TABLE_VALID] |
++                       masks[STATIC_MAC_TABLE_OVERRIDE]))) {
++              *valid = false;
+               return 0;
+       }
+-      return -ENXIO;
++
++      alu->mac[5] = (u8)data_lo;
++      alu->mac[4] = (u8)(data_lo >> 8);
++      alu->mac[3] = (u8)(data_lo >> 16);
++      alu->mac[2] = (u8)(data_lo >> 24);
++      alu->mac[1] = (u8)data_hi;
++      alu->mac[0] = (u8)(data_hi >> 8);
++      alu->port_forward =
++              (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
++                      shifts[STATIC_MAC_FWD_PORTS];
++      alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
++      data_hi >>= 1;
++      alu->is_static = true;
++      alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
++      alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
++              shifts[STATIC_MAC_FID];
++
++      *valid = true;
++
++      return 0;
+ }
+ 
+ void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+@@ -930,20 +934,25 @@ static int ksz8_add_sta_mac(struct ksz_device *dev, int port,
+                           const unsigned char *addr, u16 vid)
+ {
+       struct alu_struct alu;
+-      int index;
++      int index, ret;
+       int empty = 0;
+ 
+       alu.port_forward = 0;
+       for (index = 0; index < dev->info->num_statics; index++) {
+-              if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+-                      /* Found one already in static MAC table. */
+-                      if (!memcmp(alu.mac, addr, ETH_ALEN) &&
+-                          alu.fid == vid)
+-                              break;
+-              /* Remember the first empty entry. */
+-              } else if (!empty) {
+-                      empty = index + 1;
++              bool valid;
++
++              ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
++              if (ret)
++                      return ret;
++              if (!valid) {
++                      /* Remember the first empty entry. */
++                      if (!empty)
++                              empty = index + 1;
++                      continue;
+               }
++
++              if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
++                      break;
+       }
+ 
+       /* no available entry */
+@@ -973,15 +982,19 @@ static int ksz8_del_sta_mac(struct ksz_device *dev, int port,
+                           const unsigned char *addr, u16 vid)
+ {
+       struct alu_struct alu;
+-      int index;
++      int index, ret;
+ 
+       for (index = 0; index < dev->info->num_statics; index++) {
+-              if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+-                      /* Found one already in static MAC table. */
+-                      if (!memcmp(alu.mac, addr, ETH_ALEN) &&
+-                          alu.fid == vid)
+-                              break;
+-              }
++              bool valid;
++
++              ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
++              if (ret)
++                      return ret;
++              if (!valid)
++                      continue;
++
++              if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
++                      break;
+       }
+ 
+       /* no available entry */
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch b/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch

new file mode 100644 (file)

index 0000000..a4b0da3
--- /dev/null
+++ b/queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch
@@ -0,0 +1,82 @@
+From d4038c95e83f7d2c42f76634c0bd1e407d38b652 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 00:20:35 +0800
+Subject: net: ethernet: litex: add support for 64 bit stats
+
+From: Jisheng Zhang <jszhang@kernel.org>
+
+[ Upstream commit 18da174d865a87d47d2f33f5b0a322efcf067728 ]
+
+Implement 64 bit per cpu stats to fix the overflow of netdev->stats
+on 32 bit platforms. To simplify the code, we use net core
+pcpu_sw_netstats infrastructure. One small drawback is some memory
+overhead because litex uses just one queue, but we allocate the
+counters per cpu.
+
+Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Acked-by: Gabriel Somlo <gsomlo@gmail.com>
+Link: https://lore.kernel.org/r/20230614162035.300-1-jszhang@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/litex/litex_liteeth.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
+index 35f24e0f09349..ffa96059079c6 100644
+--- a/drivers/net/ethernet/litex/litex_liteeth.c
++++ b/drivers/net/ethernet/litex/litex_liteeth.c
+@@ -78,8 +78,7 @@ static int liteeth_rx(struct net_device *netdev)
+       memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len);
+       skb->protocol = eth_type_trans(skb, netdev);
+ 
+-      netdev->stats.rx_packets++;
+-      netdev->stats.rx_bytes += len;
++      dev_sw_netstats_rx_add(netdev, len);
+ 
+       return netif_rx(skb);
+ 
+@@ -185,8 +184,7 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb,
+       litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len);
+       litex_write8(priv->base + LITEETH_READER_START, 1);
+ 
+-      netdev->stats.tx_bytes += skb->len;
+-      netdev->stats.tx_packets++;
++      dev_sw_netstats_tx_add(netdev, 1, skb->len);
+ 
+       priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots;
+       dev_kfree_skb_any(skb);
+@@ -194,9 +192,17 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb,
+       return NETDEV_TX_OK;
+ }
+ 
++static void
++liteeth_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
++{
++      netdev_stats_to_stats64(stats, &netdev->stats);
++      dev_fetch_sw_netstats(stats, netdev->tstats);
++}
++
+ static const struct net_device_ops liteeth_netdev_ops = {
+       .ndo_open               = liteeth_open,
+       .ndo_stop               = liteeth_stop,
++      .ndo_get_stats64        = liteeth_get_stats64,
+       .ndo_start_xmit         = liteeth_start_xmit,
+ };
+ 
+@@ -242,6 +248,11 @@ static int liteeth_probe(struct platform_device *pdev)
+       priv->netdev = netdev;
+       priv->dev = &pdev->dev;
+ 
++      netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
++                                                    struct pcpu_sw_netstats);
++      if (!netdev->tstats)
++              return -ENOMEM;
++
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0)
+               return irq;
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch b/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch

new file mode 100644 (file)

index 0000000..418095f
--- /dev/null
+++ b/queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch
@@ -0,0 +1,86 @@
+From c3465911da1e9d1a7b64a1ed1f446f1ef9666ff2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 03:42:29 +0100
+Subject: net: ethernet: mtk_eth_soc: handle probe deferral
+
+From: Daniel Golle <daniel@makrotopia.org>
+
+[ Upstream commit 1d6d537dc55d1f42d16290f00157ac387985b95b ]
+
+Move the call to of_get_ethdev_address to mtk_add_mac which is part of
+the probe function and can hence itself return -EPROBE_DEFER should
+of_get_ethdev_address return -EPROBE_DEFER. This allows us to entirely
+get rid of the mtk_init function.
+
+The problem of of_get_ethdev_address returning -EPROBE_DEFER surfaced
+in situations in which the NVMEM provider holding the MAC address has
+not yet be loaded at the time mtk_eth_soc is initially probed. In this
+case probing of mtk_eth_soc should be deferred instead of falling back
+to use a random MAC address, so once the NVMEM provider becomes
+available probing can be repeated.
+
+Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet")
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 ++++++++-------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 49975924e2426..7e318133423a9 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3425,23 +3425,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
+       return 0;
+ }
+ 
+-static int __init mtk_init(struct net_device *dev)
+-{
+-      struct mtk_mac *mac = netdev_priv(dev);
+-      struct mtk_eth *eth = mac->hw;
+-      int ret;
+-
+-      ret = of_get_ethdev_address(mac->of_node, dev);
+-      if (ret) {
+-              /* If the mac address is invalid, use random mac address */
+-              eth_hw_addr_random(dev);
+-              dev_err(eth->dev, "generated random MAC address %pM\n",
+-                      dev->dev_addr);
+-      }
+-
+-      return 0;
+-}
+-
+ static void mtk_uninit(struct net_device *dev)
+ {
+       struct mtk_mac *mac = netdev_priv(dev);
+@@ -3789,7 +3772,6 @@ static const struct ethtool_ops mtk_ethtool_ops = {
+ };
+ 
+ static const struct net_device_ops mtk_netdev_ops = {
+-      .ndo_init               = mtk_init,
+       .ndo_uninit             = mtk_uninit,
+       .ndo_open               = mtk_open,
+       .ndo_stop               = mtk_stop,
+@@ -3845,6 +3827,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+       mac->hw = eth;
+       mac->of_node = np;
+ 
++      err = of_get_ethdev_address(mac->of_node, eth->netdev[id]);
++      if (err == -EPROBE_DEFER)
++              return err;
++
++      if (err) {
++              /* If the mac address is invalid, use random mac address */
++              eth_hw_addr_random(eth->netdev[id]);
++              dev_err(eth->dev, "generated random MAC address %pM\n",
++                      eth->netdev[id]->dev_addr);
++      }
++
+       memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+       mac->hwlro_ip_cnt = 0;
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch b/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch

new file mode 100644 (file)

index 0000000..52f517c
--- /dev/null
+++ b/queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch
@@ -0,0 +1,78 @@
+From c809a11a4b6d3cfd988c7fb48576f8544d3b1d7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 16:36:57 +0530
+Subject: net: ethernet: ti: cpsw_ale: Fix
+ cpsw_ale_get_field()/cpsw_ale_set_field()
+
+From: Tanmay Patil <t-patil@ti.com>
+
+[ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ]
+
+CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words.
+The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the
+field will be strictly contained within one word. However, this is not
+guaranteed to be the case and it is possible for ALE field entries to span
+across up to two words at the most.
+
+Fix the methods to handle getting/setting fields spanning up to two words.
+
+Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support")
+Signed-off-by: Tanmay Patil <t-patil@ti.com>
+[s-vadapalli@ti.com: rephrased commit message and added Fixes tag]
+Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
+index 231370e9a8017..2647c18d40d95 100644
+--- a/drivers/net/ethernet/ti/cpsw_ale.c
++++ b/drivers/net/ethernet/ti/cpsw_ale.c
+@@ -106,23 +106,37 @@ struct cpsw_ale_dev_id {
+ 
+ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ {
+-      int idx;
++      int idx, idx2;
++      u32 hi_val = 0;
+ 
+       idx    = start / 32;
++      idx2 = (start + bits - 1) / 32;
++      /* Check if bits to be fetched exceed a word */
++      if (idx != idx2) {
++              idx2 = 2 - idx2; /* flip */
++              hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
++      }
+       start -= idx * 32;
+       idx    = 2 - idx; /* flip */
+-      return (ale_entry[idx] >> start) & BITMASK(bits);
++      return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
+ }
+ 
+ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
+                                     u32 value)
+ {
+-      int idx;
++      int idx, idx2;
+ 
+       value &= BITMASK(bits);
+-      idx    = start / 32;
++      idx = start / 32;
++      idx2 = (start + bits - 1) / 32;
++      /* Check if bits to be set exceed a word */
++      if (idx != idx2) {
++              idx2 = 2 - idx2; /* flip */
++              ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
++              ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
++      }
+       start -= idx * 32;
+-      idx    = 2 - idx; /* flip */
++      idx = 2 - idx; /* flip */
+       ale_entry[idx] &= ~(BITMASK(bits) << start);
+       ale_entry[idx] |=  (value << start);
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch b/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch

new file mode 100644 (file)

index 0000000..1779fb5
--- /dev/null
+++ b/queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch
@@ -0,0 +1,140 @@
+From c7bac058c0b91ef65d58a3020117d8bad2853616 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 20:33:08 +0800
+Subject: net: hns3: fix strncpy() not using dest-buf length as length issue
+
+From: Hao Chen <chenhao418@huawei.com>
+
+[ Upstream commit 1cf3d5567f273a8746d1bade00633a93204f80f0 ]
+
+Now, strncpy() in hns3_dbg_fill_content() use src-length as copy-length,
+it may result in dest-buf overflow.
+
+This patch is to fix intel compile warning for csky-linux-gcc (GCC) 12.1.0
+compiler.
+
+The warning reports as below:
+
+hclge_debugfs.c:92:25: warning: 'strncpy' specified bound depends on
+the length of the source argument [-Wstringop-truncation]
+
+strncpy(pos, items[i].name, strlen(items[i].name));
+
+hclge_debugfs.c:90:25: warning: 'strncpy' output truncated before
+terminating nul copying as many bytes from a string as its length
+[-Wstringop-truncation]
+
+strncpy(pos, result[i], strlen(result[i]));
+
+strncpy() use src-length as copy-length, it may result in
+dest-buf overflow.
+
+So,this patch add some values check to avoid this issue.
+
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/lkml/202207170606.7WtHs9yS-lkp@intel.com/T/
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/hisilicon/hns3/hns3_debugfs.c    | 31 ++++++++++++++-----
+ .../hisilicon/hns3/hns3pf/hclge_debugfs.c     | 29 ++++++++++++++---
+ 2 files changed, 48 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+index bcccd82a2620f..f6ededec5a4fa 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -435,19 +435,36 @@ static void hns3_dbg_fill_content(char *content, u16 len,
+                                 const struct hns3_dbg_item *items,
+                                 const char **result, u16 size)
+ {
++#define HNS3_DBG_LINE_END_LEN 2
+       char *pos = content;
++      u16 item_len;
+       u16 i;
+ 
++      if (!len) {
++              return;
++      } else if (len <= HNS3_DBG_LINE_END_LEN) {
++              *pos++ = '\0';
++              return;
++      }
++
+       memset(content, ' ', len);
+-      for (i = 0; i < size; i++) {
+-              if (result)
+-                      strncpy(pos, result[i], strlen(result[i]));
+-              else
+-                      strncpy(pos, items[i].name, strlen(items[i].name));
++      len -= HNS3_DBG_LINE_END_LEN;
+ 
+-              pos += strlen(items[i].name) + items[i].interval;
++      for (i = 0; i < size; i++) {
++              item_len = strlen(items[i].name) + items[i].interval;
++              if (len < item_len)
++                      break;
++
++              if (result) {
++                      if (item_len < strlen(result[i]))
++                              break;
++                      strscpy(pos, result[i], strlen(result[i]));
++              } else {
++                      strscpy(pos, items[i].name, strlen(items[i].name));
++              }
++              pos += item_len;
++              len -= item_len;
+       }
+-
+       *pos++ = '\n';
+       *pos++ = '\0';
+ }
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+index 142415c84c6b2..0ebc21401b7c2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -87,16 +87,35 @@ static void hclge_dbg_fill_content(char *content, u16 len,
+                                  const struct hclge_dbg_item *items,
+                                  const char **result, u16 size)
+ {
++#define HCLGE_DBG_LINE_END_LEN        2
+       char *pos = content;
++      u16 item_len;
+       u16 i;
+ 
++      if (!len) {
++              return;
++      } else if (len <= HCLGE_DBG_LINE_END_LEN) {
++              *pos++ = '\0';
++              return;
++      }
++
+       memset(content, ' ', len);
++      len -= HCLGE_DBG_LINE_END_LEN;
++
+       for (i = 0; i < size; i++) {
+-              if (result)
+-                      strncpy(pos, result[i], strlen(result[i]));
+-              else
+-                      strncpy(pos, items[i].name, strlen(items[i].name));
+-              pos += strlen(items[i].name) + items[i].interval;
++              item_len = strlen(items[i].name) + items[i].interval;
++              if (len < item_len)
++                      break;
++
++              if (result) {
++                      if (item_len < strlen(result[i]))
++                              break;
++                      strscpy(pos, result[i], strlen(result[i]));
++              } else {
++                      strscpy(pos, items[i].name, strlen(items[i].name));
++              }
++              pos += item_len;
++              len -= item_len;
+       }
+       *pos++ = '\n';
+       *pos++ = '\0';
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch b/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch

new file mode 100644 (file)

index 0000000..3645eb7
--- /dev/null
+++ b/queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch
@@ -0,0 +1,134 @@
+From d2d9a97443c3d363ac55a22c42cc9e677b12faa3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 May 2023 18:14:52 +0200
+Subject: net: ipv4: use consistent txhash in TIME_WAIT and SYN_RECV
+
+From: Antoine Tenart <atenart@kernel.org>
+
+[ Upstream commit c0a8966e2bc7d31f77a7246947ebc09c1ff06066 ]
+
+When using IPv4/TCP, skb->hash comes from sk->sk_txhash except in
+TIME_WAIT and SYN_RECV where it's not set in the reply skb from
+ip_send_unicast_reply. Those packets will have a mismatched hash with
+others from the same flow as their hashes will be 0. IPv6 does not have
+the same issue as the hash is set from the socket txhash in those cases.
+
+This commits sets the hash in the reply skb from ip_send_unicast_reply,
+which makes the IPv4 code behaving like IPv6.
+
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 5e5265522a9a ("tcp: annotate data-races around tcp_rsk(req)->txhash")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h     |  2 +-
+ net/ipv4/ip_output.c |  4 +++-
+ net/ipv4/tcp_ipv4.c  | 14 +++++++++-----
+ 3 files changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index acec504c469a0..83a1a9bc3ceb1 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -282,7 +282,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+                          const struct ip_options *sopt,
+                          __be32 daddr, __be32 saddr,
+                          const struct ip_reply_arg *arg,
+-                         unsigned int len, u64 transmit_time);
++                         unsigned int len, u64 transmit_time, u32 txhash);
+ 
+ #define IP_INC_STATS(net, field)      SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+ #define __IP_INC_STATS(net, field)    __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 2a07588265c70..7b4ab545c06e0 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1691,7 +1691,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+                          const struct ip_options *sopt,
+                          __be32 daddr, __be32 saddr,
+                          const struct ip_reply_arg *arg,
+-                         unsigned int len, u64 transmit_time)
++                         unsigned int len, u64 transmit_time, u32 txhash)
+ {
+       struct ip_options_data replyopts;
+       struct ipcm_cookie ipc;
+@@ -1754,6 +1754,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+                                                               arg->csum));
+               nskb->ip_summed = CHECKSUM_NONE;
+               nskb->mono_delivery_time = !!transmit_time;
++              if (txhash)
++                      skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
+               ip_push_pending_frames(sk, &fl4);
+       }
+ out:
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index a7de5ba74e7f7..ef740983a1222 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+       u64 transmit_time = 0;
+       struct sock *ctl_sk;
+       struct net *net;
++      u32 txhash = 0;
+ 
+       /* Never send a reset in response to a reset. */
+       if (th->rst)
+@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+                                  inet_twsk(sk)->tw_priority : sk->sk_priority;
+               transmit_time = tcp_transmit_time(sk);
+               xfrm_sk_clone_policy(ctl_sk, sk);
++              txhash = (sk->sk_state == TCP_TIME_WAIT) ?
++                       inet_twsk(sk)->tw_txhash : sk->sk_txhash;
+       } else {
+               ctl_sk->sk_mark = 0;
+               ctl_sk->sk_priority = 0;
+@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+                             skb, &TCP_SKB_CB(skb)->header.h4.opt,
+                             ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+                             &arg, arg.iov[0].iov_len,
+-                            transmit_time);
++                            transmit_time, txhash);
+ 
+       xfrm_sk_free_policy(ctl_sk);
+       sock_net_set(ctl_sk, &init_net);
+@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+                           struct sk_buff *skb, u32 seq, u32 ack,
+                           u32 win, u32 tsval, u32 tsecr, int oif,
+                           struct tcp_md5sig_key *key,
+-                          int reply_flags, u8 tos)
++                          int reply_flags, u8 tos, u32 txhash)
+ {
+       const struct tcphdr *th = tcp_hdr(skb);
+       struct {
+@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+                             skb, &TCP_SKB_CB(skb)->header.h4.opt,
+                             ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+                             &arg, arg.iov[0].iov_len,
+-                            transmit_time);
++                            transmit_time, txhash);
+ 
+       sock_net_set(ctl_sk, &init_net);
+       __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+                       tw->tw_bound_dev_if,
+                       tcp_twsk_md5_key(tcptw),
+                       tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
+-                      tw->tw_tos
++                      tw->tw_tos,
++                      tw->tw_txhash
+                       );
+ 
+       inet_twsk_put(tw);
+@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       0,
+                       tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+                       inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+-                      ip_hdr(skb)->tos);
++                      ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+ }
+ 
+ /*
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch b/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch

new file mode 100644 (file)

index 0000000..db0b541
--- /dev/null
+++ b/queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch
@@ -0,0 +1,38 @@
+From 9ba17b30e66744d6805871a41ff330f6594f1806 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 17:59:19 +0800
+Subject: net: ipv4: Use kfree_sensitive instead of kfree
+
+From: Wang Ming <machel@vivo.com>
+
+[ Upstream commit daa751444fd9d4184270b1479d8af49aaf1a1ee6 ]
+
+key might contain private part of the key, so better use
+kfree_sensitive to free it.
+
+Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP")
+Signed-off-by: Wang Ming <machel@vivo.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/esp4.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+index 52c8047efedbb..2d094d417ecae 100644
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
+       err = crypto_aead_setkey(aead, key, keylen);
+ 
+ free_key:
+-      kfree(key);
++      kfree_sensitive(key);
+ 
+ error:
+       return err;
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch b/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch

new file mode 100644 (file)

index 0000000..21fad0b
--- /dev/null
+++ b/queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch
@@ -0,0 +1,39 @@
+From d40157f8faa30cf97d32dde6d80704d5d0898f75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 22:45:19 +0800
+Subject: net:ipv6: check return value of pskb_trim()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ]
+
+goto tx_err if an unexpected result is returned by pskb_tirm()
+in ip6erspan_tunnel_xmit().
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_gre.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 216b40ccadae0..d3fba7d8dec4e 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -977,7 +977,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+               goto tx_err;
+ 
+       if (skb->len > dev->mtu + dev->hard_header_len) {
+-              pskb_trim(skb, dev->mtu + dev->hard_header_len);
++              if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
++                      goto tx_err;
+               truncate = true;
+       }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch b/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch

new file mode 100644 (file)

index 0000000..45e4500
--- /dev/null
+++ b/queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch
@@ -0,0 +1,74 @@
+From 5cd4f073ef92600361ab34604f85b132f284a528 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 03:02:31 +0300
+Subject: net: phy: prevent stale pointer dereference in phy_init()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1c613beaf877c0c0d755853dc62687e2013e55c4 ]
+
+mdio_bus_init() and phy_driver_register() both have error paths, and if
+those are ever hit, ethtool will have a stale pointer to the
+phy_ethtool_phy_ops stub structure, which references memory from a
+module that failed to load (phylib).
+
+It is probably hard to force an error in this code path even manually,
+but the error teardown path of phy_init() should be the same as
+phy_exit(), which is now simply not the case.
+
+Fixes: 55d8f053ce1b ("net: phy: Register ethtool PHY operations")
+Link: https://lore.kernel.org/netdev/ZLaiJ4G6TaJYGJyU@shell.armlinux.org.uk/
+Suggested-by: Russell King (Oracle) <linux@armlinux.org.uk>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20230720000231.1939689-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 7fbb0904b3c0f..82f74f96eba29 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -3252,23 +3252,30 @@ static int __init phy_init(void)
+ {
+       int rc;
+ 
++      ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
++
+       rc = mdio_bus_init();
+       if (rc)
+-              return rc;
++              goto err_ethtool_phy_ops;
+ 
+-      ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
+       features_init();
+ 
+       rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
+       if (rc)
+-              goto err_c45;
++              goto err_mdio_bus;
+ 
+       rc = phy_driver_register(&genphy_driver, THIS_MODULE);
+-      if (rc) {
+-              phy_driver_unregister(&genphy_c45_driver);
++      if (rc)
++              goto err_c45;
++
++      return 0;
++
+ err_c45:
+-              mdio_bus_exit();
+-      }
++      phy_driver_unregister(&genphy_c45_driver);
++err_mdio_bus:
++      mdio_bus_exit();
++err_ethtool_phy_ops:
++      ethtool_set_ethtool_phy_ops(NULL);
+ 
+       return rc;
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch b/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch

new file mode 100644 (file)

index 0000000..fca333f
--- /dev/null
+++ b/queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch
@@ -0,0 +1,165 @@
+From 80ba7d3f04c1dd00e5a8cdab662fc9acf1a3b2b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:13 -0300
+Subject: net: sched: cls_bpf: Undo tcf_bind_filter in case of an error
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 26a22194927e8521e304ed75c2f38d8068d55fc7 ]
+
+If cls_bpf_offload errors out, we must also undo tcf_bind_filter that
+was done before the error.
+
+Fix that by calling tcf_unbind_filter in errout_parms.
+
+Fixes: eadb41489fd2 ("net: cls_bpf: add support for marking filters as hardware-only")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_bpf.c | 99 +++++++++++++++++++++------------------------
+ 1 file changed, 47 insertions(+), 52 deletions(-)
+
+diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
+index bc317b3eac124..0320e11eb248b 100644
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -404,56 +404,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+       return 0;
+ }
+ 
+-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+-                           struct cls_bpf_prog *prog, unsigned long base,
+-                           struct nlattr **tb, struct nlattr *est, u32 flags,
+-                           struct netlink_ext_ack *extack)
+-{
+-      bool is_bpf, is_ebpf, have_exts = false;
+-      u32 gen_flags = 0;
+-      int ret;
+-
+-      is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+-      is_ebpf = tb[TCA_BPF_FD];
+-      if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
+-              return -EINVAL;
+-
+-      ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
+-                              extack);
+-      if (ret < 0)
+-              return ret;
+-
+-      if (tb[TCA_BPF_FLAGS]) {
+-              u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+-
+-              if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+-                      return -EINVAL;
+-
+-              have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+-      }
+-      if (tb[TCA_BPF_FLAGS_GEN]) {
+-              gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+-              if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+-                  !tc_flags_valid(gen_flags))
+-                      return -EINVAL;
+-      }
+-
+-      prog->exts_integrated = have_exts;
+-      prog->gen_flags = gen_flags;
+-
+-      ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+-                     cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+-      if (ret < 0)
+-              return ret;
+-
+-      if (tb[TCA_BPF_CLASSID]) {
+-              prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+-              tcf_bind_filter(tp, &prog->res, base);
+-      }
+-
+-      return 0;
+-}
+-
+ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+                         struct tcf_proto *tp, unsigned long base,
+                         u32 handle, struct nlattr **tca,
+@@ -461,9 +411,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+                         struct netlink_ext_ack *extack)
+ {
+       struct cls_bpf_head *head = rtnl_dereference(tp->root);
++      bool is_bpf, is_ebpf, have_exts = false;
+       struct cls_bpf_prog *oldprog = *arg;
+       struct nlattr *tb[TCA_BPF_MAX + 1];
++      bool bound_to_filter = false;
+       struct cls_bpf_prog *prog;
++      u32 gen_flags = 0;
+       int ret;
+ 
+       if (tca[TCA_OPTIONS] == NULL)
+@@ -502,11 +455,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+               goto errout;
+       prog->handle = handle;
+ 
+-      ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
+-                              extack);
++      is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
++      is_ebpf = tb[TCA_BPF_FD];
++      if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
++              ret = -EINVAL;
++              goto errout_idr;
++      }
++
++      ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
++                              flags, extack);
++      if (ret < 0)
++              goto errout_idr;
++
++      if (tb[TCA_BPF_FLAGS]) {
++              u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
++
++              if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
++                      ret = -EINVAL;
++                      goto errout_idr;
++              }
++
++              have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
++      }
++      if (tb[TCA_BPF_FLAGS_GEN]) {
++              gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
++              if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
++                  !tc_flags_valid(gen_flags)) {
++                      ret = -EINVAL;
++                      goto errout_idr;
++              }
++      }
++
++      prog->exts_integrated = have_exts;
++      prog->gen_flags = gen_flags;
++
++      ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
++              cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+       if (ret < 0)
+               goto errout_idr;
+ 
++      if (tb[TCA_BPF_CLASSID]) {
++              prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
++              tcf_bind_filter(tp, &prog->res, base);
++              bound_to_filter = true;
++      }
++
+       ret = cls_bpf_offload(tp, prog, oldprog, extack);
+       if (ret)
+               goto errout_parms;
+@@ -528,6 +521,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+       return 0;
+ 
+ errout_parms:
++      if (bound_to_filter)
++              tcf_unbind_filter(tp, &prog->res);
+       cls_bpf_free_parms(prog);
+ errout_idr:
+       if (!oldprog)
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch b/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch

new file mode 100644 (file)

index 0000000..892c645
--- /dev/null
+++ b/queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch
@@ -0,0 +1,98 @@
+From df17b2737c98c54588b1108cd709109a4a053d7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:10 -0300
+Subject: net: sched: cls_matchall: Undo tcf_bind_filter in case of failure
+ after mall_set_parms
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit b3d0e0489430735e2e7626aa37e6462cdd136e9d ]
+
+In case an error occurred after mall_set_parms executed successfully, we
+must undo the tcf_bind_filter call it issues.
+
+Fix that by calling tcf_unbind_filter in err_replace_hw_filter label.
+
+Fixes: ec2507d2a306 ("net/sched: cls_matchall: Fix error path")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_matchall.c | 35 ++++++++++++-----------------------
+ 1 file changed, 12 insertions(+), 23 deletions(-)
+
+diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
+index 39a5d9c170def..43f8df5847414 100644
+--- a/net/sched/cls_matchall.c
++++ b/net/sched/cls_matchall.c
+@@ -157,26 +157,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
+       [TCA_MATCHALL_FLAGS]            = { .type = NLA_U32 },
+ };
+ 
+-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
+-                        struct cls_mall_head *head,
+-                        unsigned long base, struct nlattr **tb,
+-                        struct nlattr *est, u32 flags, u32 fl_flags,
+-                        struct netlink_ext_ack *extack)
+-{
+-      int err;
+-
+-      err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
+-                                 fl_flags, extack);
+-      if (err < 0)
+-              return err;
+-
+-      if (tb[TCA_MATCHALL_CLASSID]) {
+-              head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+-              tcf_bind_filter(tp, &head->res, base);
+-      }
+-      return 0;
+-}
+-
+ static int mall_change(struct net *net, struct sk_buff *in_skb,
+                      struct tcf_proto *tp, unsigned long base,
+                      u32 handle, struct nlattr **tca,
+@@ -185,6 +165,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+ {
+       struct cls_mall_head *head = rtnl_dereference(tp->root);
+       struct nlattr *tb[TCA_MATCHALL_MAX + 1];
++      bool bound_to_filter = false;
+       struct cls_mall_head *new;
+       u32 userflags = 0;
+       int err;
+@@ -224,11 +205,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+               goto err_alloc_percpu;
+       }
+ 
+-      err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
+-                           flags, new->flags, extack);
+-      if (err)
++      err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
++                                 &new->exts, flags, new->flags, extack);
++      if (err < 0)
+               goto err_set_parms;
+ 
++      if (tb[TCA_MATCHALL_CLASSID]) {
++              new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
++              tcf_bind_filter(tp, &new->res, base);
++              bound_to_filter = true;
++      }
++
+       if (!tc_skip_hw(new->flags)) {
+               err = mall_replace_hw_filter(tp, new, (unsigned long)new,
+                                            extack);
+@@ -244,6 +231,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+       return 0;
+ 
+ err_replace_hw_filter:
++      if (bound_to_filter)
++              tcf_unbind_filter(tp, &new->res);
+ err_set_parms:
+       free_percpu(new->pf);
+ err_alloc_percpu:
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch b/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch

new file mode 100644 (file)

index 0000000..644fb9b
--- /dev/null
+++ b/queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch
@@ -0,0 +1,49 @@
+From 2565a1a811821f66ba1cd9a3bb9496fbecdc80e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:12 -0300
+Subject: net: sched: cls_u32: Undo refcount decrement in case update failed
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit e8d3d78c19be0264a5692bed477c303523aead31 ]
+
+In the case of an update, when TCA_U32_LINK is set, u32_set_parms will
+decrement the refcount of the ht_down (struct tc_u_hnode) pointer
+present in the older u32 filter which we are replacing. However, if
+u32_replace_hw_knode errors out, the update command fails and that
+ht_down pointer continues decremented. To fix that, when
+u32_replace_hw_knode fails, check if ht_down's refcount was decremented
+and undo the decrement.
+
+Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 7cfbcd5180841..1280736a7b92e 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -926,6 +926,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+               if (err) {
+                       u32_unbind_filter(tp, new, tb);
+ 
++                      if (tb[TCA_U32_LINK]) {
++                              struct tc_u_hnode *ht_old;
++
++                              ht_old = rtnl_dereference(n->ht_down);
++                              if (ht_old)
++                                      ht_old->refcnt++;
++                      }
+                       __u32_destroy_key(new);
+                       return err;
+               }
+-- 
+2.39.2
+
diff --git a/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch b/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch

new file mode 100644 (file)

index 0000000..b118e64
--- /dev/null
+++ b/queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch
@@ -0,0 +1,122 @@
+From 66d4c485e832ee7c6d50709763bfdf4c14e821d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:11 -0300
+Subject: net: sched: cls_u32: Undo tcf_bind_filter if u32_replace_hw_knode
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 9cb36faedeafb9720ac236aeae2ea57091d90a09 ]
+
+When u32_replace_hw_knode fails, we need to undo the tcf_bind_filter
+operation done at u32_set_parms.
+
+Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index a3477537c102b..7cfbcd5180841 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -710,8 +710,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
+       [TCA_U32_FLAGS]         = { .type = NLA_U32 },
+ };
+ 
++static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
++                            struct nlattr **tb)
++{
++      if (tb[TCA_U32_CLASSID])
++              tcf_unbind_filter(tp, &n->res);
++}
++
++static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
++                          unsigned long base, struct nlattr **tb)
++{
++      if (tb[TCA_U32_CLASSID]) {
++              n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
++              tcf_bind_filter(tp, &n->res, base);
++      }
++}
++
+ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+-                       unsigned long base,
+                        struct tc_u_knode *n, struct nlattr **tb,
+                        struct nlattr *est, u32 flags, u32 fl_flags,
+                        struct netlink_ext_ack *extack)
+@@ -758,10 +773,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+               if (ht_old)
+                       ht_old->refcnt--;
+       }
+-      if (tb[TCA_U32_CLASSID]) {
+-              n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+-              tcf_bind_filter(tp, &n->res, base);
+-      }
+ 
+       if (ifindex >= 0)
+               n->ifindex = ifindex;
+@@ -901,17 +912,20 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+               if (!new)
+                       return -ENOMEM;
+ 
+-              err = u32_set_parms(net, tp, base, new, tb,
+-                                  tca[TCA_RATE], flags, new->flags,
+-                                  extack);
++              err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
++                                  flags, new->flags, extack);
+ 
+               if (err) {
+                       __u32_destroy_key(new);
+                       return err;
+               }
+ 
++              u32_bind_filter(tp, new, base, tb);
++
+               err = u32_replace_hw_knode(tp, new, flags, extack);
+               if (err) {
++                      u32_unbind_filter(tp, new, tb);
++
+                       __u32_destroy_key(new);
+                       return err;
+               }
+@@ -1072,15 +1086,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+       }
+ #endif
+ 
+-      err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
++      err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
+                           flags, n->flags, extack);
++
++      u32_bind_filter(tp, n, base, tb);
++
+       if (err == 0) {
+               struct tc_u_knode __rcu **ins;
+               struct tc_u_knode *pins;
+ 
+               err = u32_replace_hw_knode(tp, n, flags, extack);
+               if (err)
+-                      goto errhw;
++                      goto errunbind;
+ 
+               if (!tc_in_hw(n->flags))
+                       n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+@@ -1098,7 +1115,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+               return 0;
+       }
+ 
+-errhw:
++errunbind:
++      u32_unbind_filter(tp, n, tb);
++
+ #ifdef CONFIG_CLS_U32_MARK
+       free_percpu(n->pcpu_success);
+ #endif
+-- 
+2.39.2
+
diff --git a/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch b/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch

new file mode 100644 (file)

index 0000000..e9e644e
--- /dev/null
+++ b/queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch
@@ -0,0 +1,64 @@
+From 93023625146793635d96beb87c81594cb326e47c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 01:30:33 +0200
+Subject: netfilter: nf_tables: can't schedule in nft_chain_validate
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ]
+
+Can be called via nft set element list iteration, which may acquire
+rcu and/or bh read lock (depends on set type).
+
+BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353
+in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft
+preempt_count: 0, expected: 0
+RCU nest depth: 1, expected: 0
+2 locks held by nft/1232:
+ #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid
+ #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire
+Call Trace:
+ nft_chain_validate
+ nft_lookup_validate_setelem
+ nft_pipapo_walk
+ nft_lookup_validate
+ nft_chain_validate
+ nft_immediate_validate
+ nft_chain_validate
+ nf_tables_validate
+ nf_tables_abort
+
+No choice but to move it to nf_tables_validate().
+
+Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 58f14e4ef63d4..0bb1cc7ed5e99 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3500,8 +3500,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+                       if (err < 0)
+                               return err;
+               }
+-
+-              cond_resched();
+       }
+ 
+       return 0;
+@@ -3525,6 +3523,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
+               err = nft_chain_validate(&ctx, chain);
+               if (err < 0)
+                       return err;
++
++              cond_resched();
+       }
+ 
+       return 0;
+-- 
+2.39.2
+
diff --git a/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch b/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch

new file mode 100644 (file)

index 0000000..d9dbd34
--- /dev/null
+++ b/queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch
@@ -0,0 +1,49 @@
+From 447b7e2bbc060e4f8293f9e084a379b95e8bf78b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 00:29:58 +0200
+Subject: netfilter: nf_tables: fix spurious set element insertion failure
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ]
+
+On some platforms there is a padding hole in the nft_verdict
+structure, between the verdict code and the chain pointer.
+
+On element insertion, if the new element clashes with an existing one and
+NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as
+the data associated with duplicated element is the same as the existing
+one.  The data equality check uses memcmp.
+
+For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT
+padding area leads to spurious failure even if the verdict data is the
+same.
+
+This then makes the insertion fail with 'already exists' error, even
+though the new "key : data" matches an existing entry and userspace
+told the kernel that it doesn't want to receive an error indication.
+
+Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 938cfa9a3adb6..58f14e4ef63d4 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10114,6 +10114,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+ 
+       if (!tb[NFTA_VERDICT_CODE])
+               return -EINVAL;
++
++      /* zero padding hole for memcmp */
++      memset(data, 0, sizeof(*data));
+       data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+ 
+       switch (data->verdict.code) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch b/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch

new file mode 100644 (file)

index 0000000..240214e
--- /dev/null
+++ b/queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch
@@ -0,0 +1,37 @@
+From 2de006dd895fa8e0d71406e0293e4e0caa40e552 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 20:19:43 +0200
+Subject: netfilter: nf_tables: skip bound chain in netns release path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 751d460ccff3137212f47d876221534bf0490996 ]
+
+Skip bound chain from netns release path, the rule that owns this chain
+releases these objects.
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 0bb1cc7ed5e99..f621c5e48747b 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10398,6 +10398,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
+       ctx.family = table->family;
+       ctx.table = table;
+       list_for_each_entry(chain, &table->chains, list) {
++              if (nft_chain_is_bound(chain))
++                      continue;
++
+               ctx.chain = chain;
+               list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+                       list_del(&rule->list);
+-- 
+2.39.2
+
diff --git a/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch b/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch

new file mode 100644 (file)

index 0000000..9aff1bc
--- /dev/null
+++ b/queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch
@@ -0,0 +1,43 @@
+From 00af5d0ed7436d8d334b78b70165969fd0c0dde3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 09:17:21 +0200
+Subject: netfilter: nf_tables: skip bound chain on rule flush
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ]
+
+Skip bound chain when flushing table rules, the rule that owns this
+chain releases these objects.
+
+Otherwise, the following warning is triggered:
+
+  WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
+  CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1
+  RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Reported-by: Kevin Rich <kevinrich1337@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index f621c5e48747b..ecde497368ec4 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3892,6 +3892,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+               list_for_each_entry(chain, &table->chains, list) {
+                       if (!nft_is_active_next(net, chain))
+                               continue;
++                      if (nft_chain_is_bound(chain))
++                              continue;
+ 
+                       ctx.chain = chain;
+                       err = nft_delrule_by_chain(&ctx);
+-- 
+2.39.2
+
diff --git a/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch b/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch

new file mode 100644 (file)

index 0000000..91dcec1
--- /dev/null
+++ b/queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch
@@ -0,0 +1,63 @@
+From 83c0d8d2e1df2dea06f0b2bf34a73af311411a76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:08:21 +0200
+Subject: netfilter: nft_set_pipapo: fix improper element removal
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 87b5a5c209405cb6b57424cdfa226a6dbd349232 ]
+
+end key should be equal to start unless NFT_SET_EXT_KEY_END is present.
+
+Its possible to add elements that only have a start key
+("{ 1.0.0.0 . 2.0.0.0 }") without an internval end.
+
+Insertion treats this via:
+
+if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+   end = (const u8 *)nft_set_ext_key_end(ext)->data;
+else
+   end = start;
+
+but removal side always uses nft_set_ext_key_end().
+This is wrong and leads to garbage remaining in the set after removal
+next lookup/insert attempt will give:
+
+BUG: KASAN: slab-use-after-free in pipapo_get+0x8eb/0xb90
+Read of size 1 at addr ffff888100d50586 by task nft-pipapo_uaf_/1399
+Call Trace:
+ kasan_report+0x105/0x140
+ pipapo_get+0x8eb/0xb90
+ nft_pipapo_insert+0x1dc/0x1710
+ nf_tables_newsetelem+0x31f5/0x4e00
+ ..
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Reported-by: lonial con <kongln9170@gmail.com>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_pipapo.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index 0452ee586c1cc..a81829c10feab 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -1930,7 +1930,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+               int i, start, rules_fx;
+ 
+               match_start = data;
+-              match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++
++              if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
++                      match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++              else
++                      match_end = data;
+ 
+               start = first_rule;
+               rules_fx = rules_f0;
+-- 
+2.39.2
+
diff --git a/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch b/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch

new file mode 100644 (file)

index 0000000..27c97b9
--- /dev/null
+++ b/queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch
@@ -0,0 +1,43 @@
+From b8bfbeb43ba95b6189f76448167e05a0545f9706 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Jul 2023 15:07:41 +0530
+Subject: octeontx2-pf: Dont allocate BPIDs for LBK interfaces
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 8fcd7c7b3a38ab5e452f542fda8f7940e77e479a ]
+
+Current driver enables backpressure for LBK interfaces.
+But these interfaces do not support this feature.
+Hence, this patch fixes the issue by skipping the
+backpressure configuration for these interfaces.
+
+Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool").
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Link: https://lore.kernel.org/r/20230716093741.28063-1-gakula@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index ed911d9946277..c236dba80ff1a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1452,8 +1452,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
+       if (err)
+               goto err_free_npa_lf;
+ 
+-      /* Enable backpressure */
+-      otx2_nix_config_bp(pf, true);
++      /* Enable backpressure for CGX mapped PF/VFs */
++      if (!is_otx2_lbkvf(pf->pdev))
++              otx2_nix_config_bp(pf, true);
+ 
+       /* Init Auras and pools used by NIX RQ, for free buffer ptrs */
+       err = otx2_rq_aura_pool_init(pf);
+-- 
+2.39.2
+
diff --git a/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch b/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch

new file mode 100644 (file)

index 0000000..430fc7f
--- /dev/null
+++ b/queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch
@@ -0,0 +1,63 @@
+From 2c90078841a0854ee8bf4c7fa749f54fbd044f83 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Tue, 13 Jun 2023 10:13:37 +0200
+Subject: [PATCH AUTOSEL 4.19 06/11] ovl: check type and offset of struct
+ vfsmount in ovl_entry
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit f723edb8a532cd26e1ff0a2b271d73762d48f762 ]
+
+Porting overlayfs to the new amount api I started experiencing random
+crashes that couldn't be explained easily. So after much debugging and
+reasoning it became clear that struct ovl_entry requires the point to
+struct vfsmount to be the first member and of type struct vfsmount.
+
+During the port I added a new member at the beginning of struct
+ovl_entry which broke all over the place in the form of random crashes
+and cache corruptions. While there's a comment in ovl_free_fs() to the
+effect of "Hack! Reuse ofs->layers as a vfsmount array before freeing
+it" there's no such comment on struct ovl_entry which makes this easy to
+trip over.
+
+Add a comment and two static asserts for both the offset and the type of
+pointer in struct ovl_entry.
+
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/ovl_entry.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
+index 1a1adc697c553..3d34ce992f0d8 100644
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -28,6 +28,7 @@ struct ovl_sb {
+ };
+ 
+ struct ovl_layer {
++      /* ovl_free_fs() relies on @mnt being the first member! */
+       struct vfsmount *mnt;
+       /* Trap in ovl inode cache */
+       struct inode *trap;
+@@ -38,6 +39,14 @@ struct ovl_layer {
+       int fsid;
+ };
+ 
++/*
++ * ovl_free_fs() relies on @mnt being the first member when unmounting
++ * the private mounts created for each layer. Let's check both the
++ * offset and type.
++ */
++static_assert(offsetof(struct ovl_layer, mnt) == 0);
++static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *));
++
+ struct ovl_path {
+       struct ovl_layer *layer;
+       struct dentry *dentry;
+-- 
+2.39.2
+
diff --git a/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch b/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch

new file mode 100644 (file)

index 0000000..985a8b2
--- /dev/null
+++ b/queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch
@@ -0,0 +1,94 @@
+From 680f36a4f5e7d831b67c91dafe4f6c7797e53475 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 16:45:46 +0100
+Subject: perf build: Fix library not found error when using CSLIBS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: James Clark <james.clark@arm.com>
+
+[ Upstream commit 1feece2780ac2f8de45177fe53979726cee4b3d1 ]
+
+-L only specifies the search path for libraries directly provided in the
+link line with -l. Because -lopencsd isn't specified, it's only linked
+because it's a dependency of -lopencsd_c_api. Dependencies like this are
+resolved using the default system search paths or -rpath-link=... rather
+than -L. This means that compilation only works if OpenCSD is installed
+to the system rather than provided with the CSLIBS (-L) option.
+
+This could be fixed by adding -Wl,-rpath-link=$(CSLIBS) but that is less
+conventional than just adding -lopencsd to the link line so that it uses
+-L. -lopencsd seems to have been removed in commit ed17b1914978eddb
+("perf tools: Drop requirement for libstdc++.so for libopencsd check")
+because it was thought that there was a chance compilation would work
+even if it didn't exist, but I think that only applies to libstdc++ so
+there is no harm to add it back. libopencsd.so and libopencsd_c_api.so
+would always exist together.
+
+Testing
+=======
+
+The following scenarios now all work:
+
+ * Cross build with OpenCSD installed
+ * Cross build using CSLIBS=...
+ * Native build with OpenCSD installed
+ * Native build using CSLIBS=...
+ * Static cross build with OpenCSD installed
+ * Static cross build with CSLIBS=...
+
+Committer testing:
+
+  ⬢[acme@toolbox perf-tools]$ alias m
+  alias m='make -k BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools -C tools/perf install-bin && git status && perf test python ;  perf record -o /dev/null sleep 0.01 ; perf stat --null sleep 0.01'
+  ⬢[acme@toolbox perf-tools]$ ldd ~/bin/perf | grep csd
+       libopencsd_c_api.so.1 => /lib64/libopencsd_c_api.so.1 (0x00007fd49c44e000)
+       libopencsd.so.1 => /lib64/libopencsd.so.1 (0x00007fd49bd56000)
+  ⬢[acme@toolbox perf-tools]$ cat /etc/redhat-release
+  Fedora release 36 (Thirty Six)
+  ⬢[acme@toolbox perf-tools]$
+
+Fixes: ed17b1914978eddb ("perf tools: Drop requirement for libstdc++.so for libopencsd check")
+Reported-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Signed-off-by: James Clark <james.clark@arm.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Tested-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Uwe Kleine-König <uwe@kleine-koenig.org>
+Cc: coresight@lists.linaro.org
+Closes: https://lore.kernel.org/linux-arm-kernel/56905d7a-a91e-883a-b707-9d5f686ba5f1@arm.com/
+Link: https://lore.kernel.org/all/36cc4dc6-bf4b-1093-1c0a-876e368af183@kleine-koenig.org/
+Link: https://lore.kernel.org/r/20230707154546.456720-1-james.clark@arm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/Makefile.config | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
+index 898226ea8cadc..fac6ba07eacdb 100644
+--- a/tools/perf/Makefile.config
++++ b/tools/perf/Makefile.config
+@@ -149,9 +149,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
+ ifdef CSINCLUDES
+   LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
+ endif
+-OPENCSDLIBS := -lopencsd_c_api
++OPENCSDLIBS := -lopencsd_c_api -lopencsd
+ ifeq ($(findstring -static,${LDFLAGS}),-static)
+-  OPENCSDLIBS += -lopencsd -lstdc++
++  OPENCSDLIBS += -lstdc++
+ endif
+ ifdef CSLIBS
+   LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
+-- 
+2.39.2
+
diff --git a/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch b/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch

new file mode 100644 (file)

index 0000000..51b7739
--- /dev/null
+++ b/queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch
@@ -0,0 +1,118 @@
+From 726cf612acdfe280e96ebb1977b1ec50b8c6ec28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jul 2023 12:18:58 +0100
+Subject: pinctrl: renesas: rzg2l: Handle non-unique subnode names
+
+From: Biju Das <biju.das.jz@bp.renesas.com>
+
+[ Upstream commit bfc374a145ae133613e05b9b89be561f169cb58d ]
+
+Currently, sd1 and sd0 have unique subnode names 'sd1_mux' and 'sd0_mux'.
+If we change these to non-unique subnode names such as 'mux' this can
+lead to the below conflict as the RZ/G2L pin control driver considers
+only the names of the subnodes.
+
+   pinctrl-rzg2l 11030000.pinctrl: pin P47_0 already requested by 11c00000.mmc; cannot claim for 11c10000.mmc
+   pinctrl-rzg2l 11030000.pinctrl: pin-376 (11c10000.mmc) status -22
+   pinctrl-rzg2l 11030000.pinctrl: could not request pin 376 (P47_0) from group mux  on device pinctrl-rzg2l
+   renesas_sdhi_internal_dmac 11c10000.mmc: Error applying setting, reverse things back
+
+Fix this by constructing unique names from the node names of both the
+pin control configuration node and its child node, where appropriate.
+
+Based on the work done by Geert for the RZ/V2M pinctrl driver.
+
+Fixes: c4c4637eb57f ("pinctrl: renesas: Add RZ/G2L pin and gpio controller driver")
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Link: https://lore.kernel.org/r/20230704111858.215278-1-biju.das.jz@bp.renesas.com
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/renesas/pinctrl-rzg2l.c | 28 ++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+index ca6303fc41f98..fd11d28e5a1e4 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+@@ -246,6 +246,7 @@ static int rzg2l_map_add_config(struct pinctrl_map *map,
+ 
+ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+                                  struct device_node *np,
++                                 struct device_node *parent,
+                                  struct pinctrl_map **map,
+                                  unsigned int *num_maps,
+                                  unsigned int *index)
+@@ -263,6 +264,7 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+       struct property *prop;
+       int ret, gsel, fsel;
+       const char **pin_fn;
++      const char *name;
+       const char *pin;
+ 
+       pinmux = of_find_property(np, "pinmux", NULL);
+@@ -346,8 +348,19 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+               psel_val[i] = MUX_FUNC(value);
+       }
+ 
++      if (parent) {
++              name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
++                                    parent, np);
++              if (!name) {
++                      ret = -ENOMEM;
++                      goto done;
++              }
++      } else {
++              name = np->name;
++      }
++
+       /* Register a single pin group listing all the pins we read from DT */
+-      gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
++      gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
+       if (gsel < 0) {
+               ret = gsel;
+               goto done;
+@@ -357,17 +370,16 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+        * Register a single group function where the 'data' is an array PSEL
+        * register values read from DT.
+        */
+-      pin_fn[0] = np->name;
+-      fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
+-                                         psel_val);
++      pin_fn[0] = name;
++      fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
+       if (fsel < 0) {
+               ret = fsel;
+               goto remove_group;
+       }
+ 
+       maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
+-      maps[idx].data.mux.group = np->name;
+-      maps[idx].data.mux.function = np->name;
++      maps[idx].data.mux.group = name;
++      maps[idx].data.mux.function = name;
+       idx++;
+ 
+       dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
+@@ -414,7 +426,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
+       index = 0;
+ 
+       for_each_child_of_node(np, child) {
+-              ret = rzg2l_dt_subnode_to_map(pctldev, child, map,
++              ret = rzg2l_dt_subnode_to_map(pctldev, child, np, map,
+                                             num_maps, &index);
+               if (ret < 0) {
+                       of_node_put(child);
+@@ -423,7 +435,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
+       }
+ 
+       if (*num_maps == 0) {
+-              ret = rzg2l_dt_subnode_to_map(pctldev, np, map,
++              ret = rzg2l_dt_subnode_to_map(pctldev, np, NULL, map,
+                                             num_maps, &index);
+               if (ret < 0)
+                       goto done;
+-- 
+2.39.2
+
diff --git a/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch b/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch

new file mode 100644 (file)

index 0000000..b84aa52
--- /dev/null
+++ b/queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch
@@ -0,0 +1,116 @@
+From 825d0cfe089333f10e47c7657c16035ce33865d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 17:07:06 +0200
+Subject: pinctrl: renesas: rzv2m: Handle non-unique subnode names
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+[ Upstream commit f46a0b47cc0829acd050213194c5a77351e619b2 ]
+
+The eMMC and SDHI pin control configuration nodes in DT have subnodes
+with the same names ("data" and "ctrl").  As the RZ/V2M pin control
+driver considers only the names of the subnodes, this leads to
+conflicts:
+
+    pinctrl-rzv2m b6250000.pinctrl: pin P8_2 already requested by 85000000.mmc; cannot claim for 85020000.mmc
+    pinctrl-rzv2m b6250000.pinctrl: pin-130 (85020000.mmc) status -22
+    renesas_sdhi_internal_dmac 85020000.mmc: Error applying setting, reverse things back
+
+Fix this by constructing unique names from the node names of both the
+pin control configuration node and its child node, where appropriate.
+
+Reported by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+
+Fixes: 92a9b825257614af ("pinctrl: renesas: Add RZ/V2M pin and gpio controller driver")
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Tested-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+Link: https://lore.kernel.org/r/607bd6ab4905b0b1b119a06ef953fa1184505777.1688396717.git.geert+renesas@glider.be
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/renesas/pinctrl-rzv2m.c | 28 ++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+index e8c18198bebd2..35f382b055e83 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+@@ -207,6 +207,7 @@ static int rzv2m_map_add_config(struct pinctrl_map *map,
+ 
+ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+                                  struct device_node *np,
++                                 struct device_node *parent,
+                                  struct pinctrl_map **map,
+                                  unsigned int *num_maps,
+                                  unsigned int *index)
+@@ -224,6 +225,7 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+       struct property *prop;
+       int ret, gsel, fsel;
+       const char **pin_fn;
++      const char *name;
+       const char *pin;
+ 
+       pinmux = of_find_property(np, "pinmux", NULL);
+@@ -307,8 +309,19 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+               psel_val[i] = MUX_FUNC(value);
+       }
+ 
++      if (parent) {
++              name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
++                                    parent, np);
++              if (!name) {
++                      ret = -ENOMEM;
++                      goto done;
++              }
++      } else {
++              name = np->name;
++      }
++
+       /* Register a single pin group listing all the pins we read from DT */
+-      gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
++      gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
+       if (gsel < 0) {
+               ret = gsel;
+               goto done;
+@@ -318,17 +331,16 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+        * Register a single group function where the 'data' is an array PSEL
+        * register values read from DT.
+        */
+-      pin_fn[0] = np->name;
+-      fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
+-                                         psel_val);
++      pin_fn[0] = name;
++      fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
+       if (fsel < 0) {
+               ret = fsel;
+               goto remove_group;
+       }
+ 
+       maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
+-      maps[idx].data.mux.group = np->name;
+-      maps[idx].data.mux.function = np->name;
++      maps[idx].data.mux.group = name;
++      maps[idx].data.mux.function = name;
+       idx++;
+ 
+       dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
+@@ -375,7 +387,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
+       index = 0;
+ 
+       for_each_child_of_node(np, child) {
+-              ret = rzv2m_dt_subnode_to_map(pctldev, child, map,
++              ret = rzv2m_dt_subnode_to_map(pctldev, child, np, map,
+                                             num_maps, &index);
+               if (ret < 0) {
+                       of_node_put(child);
+@@ -384,7 +396,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
+       }
+ 
+       if (*num_maps == 0) {
+-              ret = rzv2m_dt_subnode_to_map(pctldev, np, map,
++              ret = rzv2m_dt_subnode_to_map(pctldev, np, NULL, map,
+                                             num_maps, &index);
+               if (ret < 0)
+                       goto done;
+-- 
+2.39.2
+
diff --git a/queue-6.1/quota-fix-warning-in-dqgrab.patch b/queue-6.1/quota-fix-warning-in-dqgrab.patch

new file mode 100644 (file)

index 0000000..033ffc0
--- /dev/null
+++ b/queue-6.1/quota-fix-warning-in-dqgrab.patch
@@ -0,0 +1,105 @@
+From 1da38321c1da0aea4122e574000e2a97ee3d2378 Mon Sep 17 00:00:00 2001
+From: Ye Bin <yebin10@huawei.com>
+Date: Mon, 5 Jun 2023 22:07:31 +0800
+Subject: [PATCH AUTOSEL 4.19 04/11] quota: fix warning in dqgrab()
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit d6a95db3c7ad160bc16b89e36449705309b52bcb ]
+
+There's issue as follows when do fault injection:
+WARNING: CPU: 1 PID: 14870 at include/linux/quotaops.h:51 dquot_disable+0x13b7/0x18c0
+Modules linked in:
+CPU: 1 PID: 14870 Comm: fsconfig Not tainted 6.3.0-next-20230505-00006-g5107a9c821af-dirty #541
+RIP: 0010:dquot_disable+0x13b7/0x18c0
+RSP: 0018:ffffc9000acc79e0 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88825e41b980
+RDX: 0000000000000000 RSI: ffff88825e41b980 RDI: 0000000000000002
+RBP: ffff888179f68000 R08: ffffffff82087ca7 R09: 0000000000000000
+R10: 0000000000000001 R11: ffffed102f3ed026 R12: ffff888179f68130
+R13: ffff888179f68110 R14: dffffc0000000000 R15: ffff888179f68118
+FS:  00007f450a073740(0000) GS:ffff88882fc00000(0000) knlGS:0000000000000000
+CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007ffe96f2efd8 CR3: 000000025c8ad000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ dquot_load_quota_sb+0xd53/0x1060
+ dquot_resume+0x172/0x230
+ ext4_reconfigure+0x1dc6/0x27b0
+ reconfigure_super+0x515/0xa90
+ __x64_sys_fsconfig+0xb19/0xd20
+ do_syscall_64+0x39/0xb0
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Above issue may happens as follows:
+ProcessA              ProcessB                    ProcessC
+sys_fsconfig
+  vfs_fsconfig_locked
+   reconfigure_super
+     ext4_remount
+      dquot_suspend -> suspend all type quota
+
+                 sys_fsconfig
+                  vfs_fsconfig_locked
+                    reconfigure_super
+                     ext4_remount
+                      dquot_resume
+                       ret = dquot_load_quota_sb
+                        add_dquot_ref
+                                           do_open  -> open file O_RDWR
+                                            vfs_open
+                                             do_dentry_open
+                                              get_write_access
+                                               atomic_inc_unless_negative(&inode->i_writecount)
+                                              ext4_file_open
+                                               dquot_file_open
+                                                dquot_initialize
+                                                  __dquot_initialize
+                                                   dqget
+                                                   atomic_inc(&dquot->dq_count);
+
+                          __dquot_initialize
+                           __dquot_initialize
+                            dqget
+                             if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+                               ext4_acquire_dquot
+                               -> Return error DQ_ACTIVE_B flag isn't set
+                         dquot_disable
+                         invalidate_dquots
+                          if (atomic_read(&dquot->dq_count))
+                           dqgrab
+                            WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+                             -> Trigger warning
+
+In the above scenario, 'dquot->dq_flags' has no DQ_ACTIVE_B is normal when
+dqgrab().
+To solve above issue just replace the dqgrab() use in invalidate_dquots() with
+atomic_inc(&dquot->dq_count).
+
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Message-Id: <20230605140731.2427629-3-yebin10@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/quota/dquot.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
+index 0d3ffc727bb00..303987d29b9c9 100644
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -540,7 +540,7 @@ static void invalidate_dquots(struct super_block *sb, int type)
+                       continue;
+               /* Wait for dquot users */
+               if (atomic_read(&dquot->dq_count)) {
+-                      dqgrab(dquot);
++                      atomic_inc(&dquot->dq_count);
+                       spin_unlock(&dq_list_lock);
+                       /*
+                        * Once dqput() wakes us up, we know it's time to free
+-- 
+2.39.2
+
diff --git a/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch b/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch

new file mode 100644 (file)

index 0000000..b7e21bb
--- /dev/null
+++ b/queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch
@@ -0,0 +1,45 @@
+From 3e9e30aa708b3b8cb0485725964206a7b72d1f9b Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 5 Jun 2023 22:07:30 +0800
+Subject: [PATCH AUTOSEL 4.19 03/11] quota: Properly disable quotas when
+ add_dquot_ref() fails
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 6a4e3363792e30177cc3965697e34ddcea8b900b ]
+
+When add_dquot_ref() fails (usually due to IO error or ENOMEM), we want
+to disable quotas we are trying to enable. However dquot_disable() call
+was passed just the flags we are enabling so in case flags ==
+DQUOT_USAGE_ENABLED dquot_disable() call will just fail with EINVAL
+instead of properly disabling quotas. Fix the problem by always passing
+DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED to dquot_disable() in this
+case.
+
+Reported-and-tested-by: Ye Bin <yebin10@huawei.com>
+Reported-by: syzbot+e633c79ceaecbf479854@syzkaller.appspotmail.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Message-Id: <20230605140731.2427629-2-yebin10@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/quota/dquot.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
+index 770a2b1434856..0d3ffc727bb00 100644
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -2407,7 +2407,8 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
+ 
+       error = add_dquot_ref(sb, type);
+       if (error)
+-              dquot_disable(sb, type, flags);
++              dquot_disable(sb, type,
++                            DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+ 
+       return error;
+ out_fmt:
+-- 
+2.39.2
+
diff --git a/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch b/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch

new file mode 100644 (file)

index 0000000..7735a74
--- /dev/null
+++ b/queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch
@@ -0,0 +1,76 @@
+From 4d3360fe4eb403c4add5725291d2c102bad4db73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Apr 2023 16:05:38 -0700
+Subject: rcu: Mark additional concurrent load from ->cpu_no_qs.b.exp
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit 9146eb25495ea8bfb5010192e61e3ed5805ce9ef ]
+
+The per-CPU rcu_data structure's ->cpu_no_qs.b.exp field is updated
+only on the instance corresponding to the current CPU, but can be read
+more widely.  Unmarked accesses are OK from the corresponding CPU, but
+only if interrupts are disabled, given that interrupt handlers can and
+do modify this field.
+
+Unfortunately, although the load from rcu_preempt_deferred_qs() is always
+carried out from the corresponding CPU, interrupts are not necessarily
+disabled.  This commit therefore upgrades this load to READ_ONCE.
+
+Similarly, the diagnostic access from synchronize_rcu_expedited_wait()
+might run with interrupts disabled and from some other CPU.  This commit
+therefore marks this load with data_race().
+
+Finally, the C-language access in rcu_preempt_ctxt_queue() is OK as
+is because interrupts are disabled and this load is always from the
+corresponding CPU.  This commit adds a comment giving the rationale for
+this access being safe.
+
+This data race was reported by KCSAN.  Not appropriate for backporting
+due to failure being unlikely.
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree_exp.h    | 2 +-
+ kernel/rcu/tree_plugin.h | 4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
+index e25321dbb068e..aa3ec3c3b9f75 100644
+--- a/kernel/rcu/tree_exp.h
++++ b/kernel/rcu/tree_exp.h
+@@ -641,7 +641,7 @@ static void synchronize_rcu_expedited_wait(void)
+                                       "O."[!!cpu_online(cpu)],
+                                       "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+                                       "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
+-                                      "D."[!!(rdp->cpu_no_qs.b.exp)]);
++                                      "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
+                       }
+               }
+               pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index e3142ee35fc6a..044026abfdd7f 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -257,6 +257,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
+        * GP should not be able to end until we report, so there should be
+        * no need to check for a subsequent expedited GP.  (Though we are
+        * still in a quiescent state in any case.)
++       *
++       * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change.
+        */
+       if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
+               rcu_report_exp_rdp(rdp);
+@@ -941,7 +943,7 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t)
+ {
+       struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+ 
+-      if (rdp->cpu_no_qs.b.exp)
++      if (READ_ONCE(rdp->cpu_no_qs.b.exp))
+               rcu_report_exp_rdp(rdp);
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch b/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch

new file mode 100644 (file)

index 0000000..a6c0629
--- /dev/null
+++ b/queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch
@@ -0,0 +1,91 @@
+From aef95e1bb3b2e697dd8a92a4b03466862cd224fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 01:22:05 +0900
+Subject: rcu-tasks: Avoid pr_info() with spin lock in cblist_init_generic()
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 5fc8cbe4cf0fd34ded8045c385790c3bf04f6785 ]
+
+pr_info() is called with rtp->cbs_gbl_lock spin lock locked.  Because
+pr_info() calls printk() that might sleep, this will result in BUG
+like below:
+
+[    0.206455] cblist_init_generic: Setting adjustable number of callback queues.
+[    0.206463]
+[    0.206464] =============================
+[    0.206464] [ BUG: Invalid wait context ]
+[    0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
+[    0.206466] -----------------------------
+[    0.206466] swapper/0/1 is trying to lock:
+[    0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
+[    0.206473] other info that might help us debug this:
+[    0.206473] context-{5:5}
+[    0.206474] 3 locks held by swapper/0/1:
+[    0.206474]  #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
+[    0.206478]  #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
+[    0.206482]  #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
+[    0.206485] stack backtrace:
+[    0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
+[    0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
+[    0.206489] Call Trace:
+[    0.206490]  <TASK>
+[    0.206491]  dump_stack_lvl+0x6a/0x9f
+[    0.206493]  __lock_acquire.cold+0x2d7/0x2fe
+[    0.206496]  ? stack_trace_save+0x46/0x70
+[    0.206497]  lock_acquire+0xd1/0x2f0
+[    0.206499]  ? serial8250_console_write+0x327/0x4a0
+[    0.206500]  ? __lock_acquire+0x5c7/0x2720
+[    0.206502]  _raw_spin_lock_irqsave+0x3d/0x90
+[    0.206504]  ? serial8250_console_write+0x327/0x4a0
+[    0.206506]  serial8250_console_write+0x327/0x4a0
+[    0.206508]  console_emit_next_record.constprop.0+0x180/0x330
+[    0.206511]  console_unlock+0xf7/0x1f0
+[    0.206512]  vprintk_emit+0xf7/0x330
+[    0.206514]  _printk+0x63/0x7e
+[    0.206516]  cblist_init_generic.constprop.0.cold+0x24/0x32
+[    0.206518]  rcu_init_tasks_generic+0x5/0xd9
+[    0.206522]  kernel_init_freeable+0x15b/0x2a2
+[    0.206523]  ? rest_init+0x160/0x160
+[    0.206526]  kernel_init+0x11/0x120
+[    0.206527]  ret_from_fork+0x1f/0x30
+[    0.206530]  </TASK>
+[    0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
+
+This patch moves pr_info() so that it is called without
+rtp->cbs_gbl_lock locked.
+
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tasks.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index df968321feada..c1f18c63b9b14 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -233,7 +233,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
+       if (rcu_task_enqueue_lim < 0) {
+               rcu_task_enqueue_lim = 1;
+               rcu_task_cb_adjust = true;
+-              pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
+       } else if (rcu_task_enqueue_lim == 0) {
+               rcu_task_enqueue_lim = 1;
+       }
+@@ -264,6 +263,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
+               raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
+       }
+       raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
++
++      if (rcu_task_cb_adjust)
++              pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
++
+       pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
+ }
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch b/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch

new file mode 100644 (file)

index 0000000..8812a74
--- /dev/null
+++ b/queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch
@@ -0,0 +1,113 @@
+From 242c82c4047048b1d67da8284935b57fc6abaa12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:59:18 -0700
+Subject: Revert "tcp: avoid the lookup process failing to get sk in ehash
+ table"
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ]
+
+This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043.
+
+Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in
+ehash table") reversed the order in how a socket is inserted into ehash
+to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are
+swapped.  However, it introduced another lookup failure.
+
+The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU
+and does not have SOCK_RCU_FREE, so the socket could be reused even while
+it is being referenced on another CPU doing RCU lookup.
+
+Let's say a socket is reused and inserted into the same hash bucket during
+lookup.  After the blamed commit, a new socket is inserted at the end of
+the list.  If that happens, we will skip sockets placed after the previous
+position of the reused socket, resulting in ehash lookup failure.
+
+As described in Documentation/RCU/rculist_nulls.rst, we should insert a
+new socket at the head of the list to avoid such an issue.
+
+This issue, the swap-lookup-failure, and another variant reported in [0]
+can all be handled properly by adding a locked ehash lookup suggested by
+Eric Dumazet [1].
+
+However, this issue could occur for every packet, thus more likely than
+the other two races, so let's revert the change for now.
+
+Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0]
+Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1]
+Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_hashtables.c    | 17 ++---------------
+ net/ipv4/inet_timewait_sock.c |  8 ++++----
+ 2 files changed, 6 insertions(+), 19 deletions(-)
+
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
+index e8734ffca85a8..c19b462662ad0 100644
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+       spin_lock(lock);
+       if (osk) {
+               WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+-              ret = sk_hashed(osk);
+-              if (ret) {
+-                      /* Before deleting the node, we insert a new one to make
+-                       * sure that the look-up-sk process would not miss either
+-                       * of them and that at least one node would exist in ehash
+-                       * table all the time. Otherwise there's a tiny chance
+-                       * that lookup process could find nothing in ehash table.
+-                       */
+-                      __sk_nulls_add_node_tail_rcu(sk, list);
+-                      sk_nulls_del_node_init_rcu(osk);
+-              }
+-              goto unlock;
+-      }
+-      if (found_dup_sk) {
++              ret = sk_nulls_del_node_init_rcu(osk);
++      } else if (found_dup_sk) {
+               *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+               if (*found_dup_sk)
+                       ret = false;
+@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+       if (ret)
+               __sk_nulls_add_node_rcu(sk, list);
+ 
+-unlock:
+       spin_unlock(lock);
+ 
+       return ret;
+diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
+index beed32fff4841..1d77d992e6e77 100644
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -91,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
+ }
+ EXPORT_SYMBOL_GPL(inet_twsk_put);
+ 
+-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+-                                      struct hlist_nulls_head *list)
++static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
++                                 struct hlist_nulls_head *list)
+ {
+-      hlist_nulls_add_tail_rcu(&tw->tw_node, list);
++      hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ }
+ 
+ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
+@@ -147,7 +147,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+ 
+       spin_lock(lock);
+ 
+-      inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
++      inet_twsk_add_node_rcu(tw, &ehead->chain);
+ 
+       /* Step 3: Remove SK from hash chain */
+       if (__sk_nulls_del_node_init_rcu(sk))
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch b/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch

new file mode 100644 (file)

index 0000000..7ea3c58
--- /dev/null
+++ b/queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch
@@ -0,0 +1,96 @@
+From 8455627afba0715ac09ca4e31fd0ca55986494f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 16:25:07 +0800
+Subject: sched/fair: Don't balance task to its current running CPU
+
+From: Yicong Yang <yangyicong@hisilicon.com>
+
+[ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ]
+
+We've run into the case that the balancer tries to balance a migration
+disabled task and trigger the warning in set_task_cpu() like below:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240
+ Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip>
+ CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G           O       6.1.0-rc4+ #1
+ Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021
+ pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : set_task_cpu+0x188/0x240
+ lr : load_balance+0x5d0/0xc60
+ sp : ffff80000803bc70
+ x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040
+ x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001
+ x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78
+ x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000
+ x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000
+ x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000
+ x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530
+ x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e
+ x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a
+ x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001
+ Call trace:
+  set_task_cpu+0x188/0x240
+  load_balance+0x5d0/0xc60
+  rebalance_domains+0x26c/0x380
+  _nohz_idle_balance.isra.0+0x1e0/0x370
+  run_rebalance_domains+0x6c/0x80
+  __do_softirq+0x128/0x3d8
+  ____do_softirq+0x18/0x24
+  call_on_irq_stack+0x2c/0x38
+  do_softirq_own_stack+0x24/0x3c
+  __irq_exit_rcu+0xcc/0xf4
+  irq_exit_rcu+0x18/0x24
+  el1_interrupt+0x4c/0xe4
+  el1h_64_irq_handler+0x18/0x2c
+  el1h_64_irq+0x74/0x78
+  arch_cpu_idle+0x18/0x4c
+  default_idle_call+0x58/0x194
+  do_idle+0x244/0x2b0
+  cpu_startup_entry+0x30/0x3c
+  secondary_start_kernel+0x14c/0x190
+  __secondary_switched+0xb0/0xb4
+ ---[ end trace 0000000000000000 ]---
+
+Further investigation shows that the warning is superfluous, the migration
+disabled task is just going to be migrated to its current running CPU.
+This is because that on load balance if the dst_cpu is not allowed by the
+task, we'll re-select a new_dst_cpu as a candidate. If no task can be
+balanced to dst_cpu we'll try to balance the task to the new_dst_cpu
+instead. In this case when the migration disabled task is not on CPU it
+only allows to run on its current CPU, load balance will select its
+current CPU as new_dst_cpu and later triggers the warning above.
+
+The new_dst_cpu is chosen from the env->dst_grpmask. Currently it
+contains CPUs in sched_group_span() and if we have overlapped groups it's
+possible to run into this case. This patch makes env->dst_grpmask of
+group_balance_mask() which exclude any CPUs from the busiest group and
+solve the issue. For balancing in a domain with no overlapped groups
+the behaviour keeps same as before.
+
+Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index fa33c441ae867..57d39de0962d7 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10556,7 +10556,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
+               .sd             = sd,
+               .dst_cpu        = this_cpu,
+               .dst_rq         = this_rq,
+-              .dst_grpmask    = sched_group_span(sd->groups),
++              .dst_grpmask    = group_balance_mask(sd->groups),
+               .idle           = idle,
+               .loop_break     = SCHED_NR_MIGRATE_BREAK,
+               .cpus           = cpus,
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch b/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch

new file mode 100644 (file)

index 0000000..9b8cfc7
--- /dev/null
+++ b/queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch
@@ -0,0 +1,41 @@
+From 87c0b2894b5bff97a3b231e21a5467e96e6ba324 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 16:07:47 +0800
+Subject: sched/fair: Use recent_used_cpu to test p->cpus_ptr
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit ae2ad293d6be143ad223f5f947cca07bcbe42595 ]
+
+When checking whether a recently used CPU can be a potential idle
+candidate, recent_used_cpu should be used to test p->cpus_ptr as
+p->recent_used_cpu is not equal to recent_used_cpu and candidate
+decision is made based on recent_used_cpu here.
+
+Fixes: 89aafd67f28c ("sched/fair: Use prev instead of new target as recent_used_cpu")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Link: https://lore.kernel.org/r/20230620080747.359122-1-linmiaohe@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 57d39de0962d7..5e5aea2360a87 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -6935,7 +6935,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+           recent_used_cpu != target &&
+           cpus_share_cache(recent_used_cpu, target) &&
+           (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+-          cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
++          cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
+           asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
+               return recent_used_cpu;
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch b/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch

new file mode 100644 (file)

index 0000000..71bccff
--- /dev/null
+++ b/queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch
@@ -0,0 +1,434 @@
+From 24ad138c2ace2a7a5bc0ceccb0055be994ccc3ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:18 +0200
+Subject: sched/psi: Allow unprivileged polling of N*2s period
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit d82caa273565b45fcf103148950549af76c314b0 ]
+
+PSI offers 2 mechanisms to get information about a specific resource
+pressure. One is reading from /proc/pressure/<resource>, which gives
+average pressures aggregated every 2s. The other is creating a pollable
+fd for a specific resource and cgroup.
+
+The trigger creation requires CAP_SYS_RESOURCE, and gives the
+possibility to pick specific time window and threshold, spawing an RT
+thread to aggregate the data.
+
+Systemd would like to provide containers the option to monitor pressure
+on their own cgroup and sub-cgroups. For example, if systemd launches a
+container that itself then launches services, the container should have
+the ability to poll() for pressure in individual services. But neither
+the container nor the services are privileged.
+
+This patch implements a mechanism to allow unprivileged users to create
+pressure triggers. The difference with privileged triggers creation is
+that unprivileged ones must have a time window that's a multiple of 2s.
+This is so that we can avoid unrestricted spawning of rt threads, and
+use instead the same aggregation mechanism done for the averages, which
+runs independently of any triggers.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-5-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/accounting/psi.rst |   4 +
+ include/linux/psi.h              |   2 +-
+ include/linux/psi_types.h        |   7 ++
+ kernel/cgroup/cgroup.c           |   2 +-
+ kernel/sched/psi.c               | 175 +++++++++++++++++++------------
+ 5 files changed, 121 insertions(+), 69 deletions(-)
+
+diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
+index 5e40b3f437f90..df6062eb3abbc 100644
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -105,6 +105,10 @@ prevent overly frequent polling. Max limit is chosen as a high enough number
+ after which monitors are most likely not needed and psi averages can be used
+ instead.
+ 
++Unprivileged users can also create monitors, with the only limitation that the
++window size must be a multiple of 2s, in order to prevent excessive resource
++usage.
++
+ When activated, psi monitor stays active for at least the duration of one
+ tracking window to avoid repeated activations/deactivations when system is
+ bouncing in and out of the stall state.
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index b029a847def1e..ab26200c28033 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -24,7 +24,7 @@ void psi_memstall_leave(unsigned long *flags);
+ 
+ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+-                      char *buf, enum psi_res res);
++                      char *buf, enum psi_res res, struct file *file);
+ void psi_trigger_destroy(struct psi_trigger *t);
+ 
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 1819afa8b1987..040c089581c6c 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -151,6 +151,9 @@ struct psi_trigger {
+ 
+       /* Deferred event(s) from previous ratelimit window */
+       bool pending_event;
++
++      /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
++      enum psi_aggregators aggregator;
+ };
+ 
+ struct psi_group {
+@@ -171,6 +174,10 @@ struct psi_group {
+       /* Aggregator work control */
+       struct delayed_work avgs_work;
+ 
++      /* Unprivileged triggers against N*PSI_FREQ windows */
++      struct list_head avg_triggers;
++      u32 avg_nr_triggers[NR_PSI_STATES - 1];
++
+       /* Total stall times and sampled pressure averages */
+       u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
+       unsigned long avg[NR_PSI_STATES - 1][3];
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 2380c4daef33d..c35efae566a4b 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
+       }
+ 
+       psi = cgroup_psi(cgrp);
+-      new = psi_trigger_create(psi, buf, res);
++      new = psi_trigger_create(psi, buf, res, of->file);
+       if (IS_ERR(new)) {
+               cgroup_put(cgrp);
+               return PTR_ERR(new);
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index f3df6a8ff493c..e072f6b31bf30 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -186,9 +186,14 @@ static void group_init(struct psi_group *group)
+               seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
+       group->avg_last_update = sched_clock();
+       group->avg_next_update = group->avg_last_update + psi_period;
+-      INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
+       mutex_init(&group->avgs_lock);
+-      /* Init trigger-related members */
++
++      /* Init avg trigger-related members */
++      INIT_LIST_HEAD(&group->avg_triggers);
++      memset(group->avg_nr_triggers, 0, sizeof(group->avg_nr_triggers));
++      INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
++
++      /* Init rtpoll trigger-related members */
+       atomic_set(&group->rtpoll_scheduled, 0);
+       mutex_init(&group->rtpoll_trigger_lock);
+       INIT_LIST_HEAD(&group->rtpoll_triggers);
+@@ -430,21 +435,32 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+       return growth;
+ }
+ 
+-static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total)
++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
++                                                 enum psi_aggregators aggregator)
+ {
+       struct psi_trigger *t;
+-      u64 *total = group->total[PSI_POLL];
++      u64 *total = group->total[aggregator];
++      struct list_head *triggers;
++      u64 *aggregator_total;
+       *update_total = false;
+ 
++      if (aggregator == PSI_AVGS) {
++              triggers = &group->avg_triggers;
++              aggregator_total = group->avg_total;
++      } else {
++              triggers = &group->rtpoll_triggers;
++              aggregator_total = group->rtpoll_total;
++      }
++
+       /*
+        * On subsequent updates, calculate growth deltas and let
+        * watchers know when their specified thresholds are exceeded.
+        */
+-      list_for_each_entry(t, &group->rtpoll_triggers, node) {
++      list_for_each_entry(t, triggers, node) {
+               u64 growth;
+               bool new_stall;
+ 
+-              new_stall = group->rtpoll_total[t->state] != total[t->state];
++              new_stall = aggregator_total[t->state] != total[t->state];
+ 
+               /* Check for stall activity or a previous threshold breach */
+               if (!new_stall && !t->pending_event)
+@@ -546,6 +562,7 @@ static void psi_avgs_work(struct work_struct *work)
+       struct delayed_work *dwork;
+       struct psi_group *group;
+       u32 changed_states;
++      bool update_total;
+       u64 now;
+ 
+       dwork = to_delayed_work(work);
+@@ -563,8 +580,10 @@ static void psi_avgs_work(struct work_struct *work)
+        * Once restarted, we'll catch up the running averages in one
+        * go - see calc_avgs() and missed_periods.
+        */
+-      if (now >= group->avg_next_update)
++      if (now >= group->avg_next_update) {
++              update_triggers(group, now, &update_total, PSI_AVGS);
+               group->avg_next_update = update_averages(group, now);
++      }
+ 
+       if (changed_states & PSI_STATE_RESCHEDULE) {
+               schedule_delayed_work(dwork, nsecs_to_jiffies(
+@@ -574,7 +593,7 @@ static void psi_avgs_work(struct work_struct *work)
+       mutex_unlock(&group->avgs_lock);
+ }
+ 
+-static void init_triggers(struct psi_group *group, u64 now)
++static void init_rtpoll_triggers(struct psi_group *group, u64 now)
+ {
+       struct psi_trigger *t;
+ 
+@@ -667,7 +686,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+       if (changed_states & group->rtpoll_states) {
+               /* Initialize trigger windows when entering polling mode */
+               if (now > group->rtpoll_until)
+-                      init_triggers(group, now);
++                      init_rtpoll_triggers(group, now);
+ 
+               /*
+                * Keep the monitor active for at least the duration of the
+@@ -684,7 +703,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+       }
+ 
+       if (now >= group->rtpoll_next_update) {
+-              group->rtpoll_next_update = update_triggers(group, now, &update_total);
++              group->rtpoll_next_update = update_triggers(group, now, &update_total, PSI_POLL);
+               if (update_total)
+                       memcpy(group->rtpoll_total, group->total[PSI_POLL],
+                                  sizeof(group->rtpoll_total));
+@@ -1254,16 +1273,23 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+ }
+ 
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+-                      char *buf, enum psi_res res)
++                      char *buf, enum psi_res res, struct file *file)
+ {
+       struct psi_trigger *t;
+       enum psi_states state;
+       u32 threshold_us;
++      bool privileged;
+       u32 window_us;
+ 
+       if (static_branch_likely(&psi_disabled))
+               return ERR_PTR(-EOPNOTSUPP);
+ 
++      /*
++       * Checking the privilege here on file->f_cred implies that a privileged user
++       * could open the file and delegate the write to an unprivileged one.
++       */
++      privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
++
+       if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
+               state = PSI_IO_SOME + res * 2;
+       else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
+@@ -1283,6 +1309,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+               window_us > WINDOW_MAX_US)
+               return ERR_PTR(-EINVAL);
+ 
++      /*
++       * Unprivileged users can only use 2s windows so that averages aggregation
++       * work is used, and no RT threads need to be spawned.
++       */
++      if (!privileged && window_us % 2000000)
++              return ERR_PTR(-EINVAL);
++
+       /* Check threshold */
+       if (threshold_us == 0 || threshold_us > window_us)
+               return ERR_PTR(-EINVAL);
+@@ -1302,31 +1335,40 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+       t->last_event_time = 0;
+       init_waitqueue_head(&t->event_wait);
+       t->pending_event = false;
++      t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+ 
+-      mutex_lock(&group->rtpoll_trigger_lock);
++      if (privileged) {
++              mutex_lock(&group->rtpoll_trigger_lock);
+ 
+-      if (!rcu_access_pointer(group->rtpoll_task)) {
+-              struct task_struct *task;
++              if (!rcu_access_pointer(group->rtpoll_task)) {
++                      struct task_struct *task;
+ 
+-              task = kthread_create(psi_rtpoll_worker, group, "psimon");
+-              if (IS_ERR(task)) {
+-                      kfree(t);
+-                      mutex_unlock(&group->rtpoll_trigger_lock);
+-                      return ERR_CAST(task);
++                      task = kthread_create(psi_rtpoll_worker, group, "psimon");
++                      if (IS_ERR(task)) {
++                              kfree(t);
++                              mutex_unlock(&group->rtpoll_trigger_lock);
++                              return ERR_CAST(task);
++                      }
++                      atomic_set(&group->rtpoll_wakeup, 0);
++                      wake_up_process(task);
++                      rcu_assign_pointer(group->rtpoll_task, task);
+               }
+-              atomic_set(&group->rtpoll_wakeup, 0);
+-              wake_up_process(task);
+-              rcu_assign_pointer(group->rtpoll_task, task);
+-      }
+ 
+-      list_add(&t->node, &group->rtpoll_triggers);
+-      group->rtpoll_min_period = min(group->rtpoll_min_period,
+-              div_u64(t->win.size, UPDATES_PER_WINDOW));
+-      group->rtpoll_nr_triggers[t->state]++;
+-      group->rtpoll_states |= (1 << t->state);
++              list_add(&t->node, &group->rtpoll_triggers);
++              group->rtpoll_min_period = min(group->rtpoll_min_period,
++                      div_u64(t->win.size, UPDATES_PER_WINDOW));
++              group->rtpoll_nr_triggers[t->state]++;
++              group->rtpoll_states |= (1 << t->state);
+ 
+-      mutex_unlock(&group->rtpoll_trigger_lock);
++              mutex_unlock(&group->rtpoll_trigger_lock);
++      } else {
++              mutex_lock(&group->avgs_lock);
++
++              list_add(&t->node, &group->avg_triggers);
++              group->avg_nr_triggers[t->state]++;
+ 
++              mutex_unlock(&group->avgs_lock);
++      }
+       return t;
+ }
+ 
+@@ -1350,34 +1392,41 @@ void psi_trigger_destroy(struct psi_trigger *t)
+        */
+       wake_up_pollfree(&t->event_wait);
+ 
+-      mutex_lock(&group->rtpoll_trigger_lock);
+-
+-      if (!list_empty(&t->node)) {
+-              struct psi_trigger *tmp;
+-              u64 period = ULLONG_MAX;
+-
+-              list_del(&t->node);
+-              group->rtpoll_nr_triggers[t->state]--;
+-              if (!group->rtpoll_nr_triggers[t->state])
+-                      group->rtpoll_states &= ~(1 << t->state);
+-              /* reset min update period for the remaining triggers */
+-              list_for_each_entry(tmp, &group->rtpoll_triggers, node)
+-                      period = min(period, div_u64(tmp->win.size,
+-                                      UPDATES_PER_WINDOW));
+-              group->rtpoll_min_period = period;
+-              /* Destroy rtpoll_task when the last trigger is destroyed */
+-              if (group->rtpoll_states == 0) {
+-                      group->rtpoll_until = 0;
+-                      task_to_destroy = rcu_dereference_protected(
+-                                      group->rtpoll_task,
+-                                      lockdep_is_held(&group->rtpoll_trigger_lock));
+-                      rcu_assign_pointer(group->rtpoll_task, NULL);
+-                      del_timer(&group->rtpoll_timer);
++      if (t->aggregator == PSI_AVGS) {
++              mutex_lock(&group->avgs_lock);
++              if (!list_empty(&t->node)) {
++                      list_del(&t->node);
++                      group->avg_nr_triggers[t->state]--;
+               }
++              mutex_unlock(&group->avgs_lock);
++      } else {
++              mutex_lock(&group->rtpoll_trigger_lock);
++              if (!list_empty(&t->node)) {
++                      struct psi_trigger *tmp;
++                      u64 period = ULLONG_MAX;
++
++                      list_del(&t->node);
++                      group->rtpoll_nr_triggers[t->state]--;
++                      if (!group->rtpoll_nr_triggers[t->state])
++                              group->rtpoll_states &= ~(1 << t->state);
++                      /* reset min update period for the remaining triggers */
++                      list_for_each_entry(tmp, &group->rtpoll_triggers, node)
++                              period = min(period, div_u64(tmp->win.size,
++                                              UPDATES_PER_WINDOW));
++                      group->rtpoll_min_period = period;
++                      /* Destroy rtpoll_task when the last trigger is destroyed */
++                      if (group->rtpoll_states == 0) {
++                              group->rtpoll_until = 0;
++                              task_to_destroy = rcu_dereference_protected(
++                                              group->rtpoll_task,
++                                              lockdep_is_held(&group->rtpoll_trigger_lock));
++                              rcu_assign_pointer(group->rtpoll_task, NULL);
++                              del_timer(&group->rtpoll_timer);
++                      }
++              }
++              mutex_unlock(&group->rtpoll_trigger_lock);
+       }
+ 
+-      mutex_unlock(&group->rtpoll_trigger_lock);
+-
+       /*
+        * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
+        * critical section before destroying the trigger and optionally the
+@@ -1437,27 +1486,19 @@ static int psi_cpu_show(struct seq_file *m, void *v)
+       return psi_show(m, &psi_system, PSI_CPU);
+ }
+ 
+-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
+-{
+-      if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+-              return -EPERM;
+-
+-      return single_open(file, psi_show, NULL);
+-}
+-
+ static int psi_io_open(struct inode *inode, struct file *file)
+ {
+-      return psi_open(file, psi_io_show);
++      return single_open(file, psi_io_show, NULL);
+ }
+ 
+ static int psi_memory_open(struct inode *inode, struct file *file)
+ {
+-      return psi_open(file, psi_memory_show);
++      return single_open(file, psi_memory_show, NULL);
+ }
+ 
+ static int psi_cpu_open(struct inode *inode, struct file *file)
+ {
+-      return psi_open(file, psi_cpu_show);
++      return single_open(file, psi_cpu_show, NULL);
+ }
+ 
+ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+@@ -1491,7 +1532,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+               return -EBUSY;
+       }
+ 
+-      new = psi_trigger_create(&psi_system, buf, res);
++      new = psi_trigger_create(&psi_system, buf, res, file);
+       if (IS_ERR(new)) {
+               mutex_unlock(&seq->lock);
+               return PTR_ERR(new);
+@@ -1571,7 +1612,7 @@ static int psi_irq_show(struct seq_file *m, void *v)
+ 
+ static int psi_irq_open(struct inode *inode, struct file *file)
+ {
+-      return psi_open(file, psi_irq_show);
++      return single_open(file, psi_irq_show, NULL);
+ }
+ 
+ static ssize_t psi_irq_write(struct file *file, const char __user *user_buf,
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch b/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch

new file mode 100644 (file)

index 0000000..8244dd6
--- /dev/null
+++ b/queue-6.1/sched-psi-extract-update_triggers-side-effect.patch
@@ -0,0 +1,91 @@
+From 3d78ff2fdc7f963507676dadc4a58e7433f61819 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:17 +0200
+Subject: sched/psi: Extract update_triggers side effect
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 4468fcae49f08e88fbbffe05b29496192df89991 ]
+
+This change moves update_total flag out of update_triggers function,
+currently called only in psi_poll_work.
+In the next patch, update_triggers will be called also in psi_avgs_work,
+but the total update information is specific to psi_poll_work.
+Returning update_total value to the caller let us avoid differentiating
+the implementation of update_triggers for different aggregators.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-4-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/psi.c | 19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index a3d0b5cf797ab..f3df6a8ff493c 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -430,11 +430,11 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+       return growth;
+ }
+ 
+-static u64 update_triggers(struct psi_group *group, u64 now)
++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total)
+ {
+       struct psi_trigger *t;
+-      bool update_total = false;
+       u64 *total = group->total[PSI_POLL];
++      *update_total = false;
+ 
+       /*
+        * On subsequent updates, calculate growth deltas and let
+@@ -462,7 +462,7 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+                        * been through all of them. Also remember to extend the
+                        * polling time if we see new stall activity.
+                        */
+-                      update_total = true;
++                      *update_total = true;
+ 
+                       /* Calculate growth since last update */
+                       growth = window_update(&t->win, now, total[t->state]);
+@@ -485,10 +485,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+               t->pending_event = false;
+       }
+ 
+-      if (update_total)
+-              memcpy(group->rtpoll_total, total,
+-                              sizeof(group->rtpoll_total));
+-
+       return now + group->rtpoll_min_period;
+ }
+ 
+@@ -622,6 +618,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+ {
+       bool force_reschedule = false;
+       u32 changed_states;
++      bool update_total;
+       u64 now;
+ 
+       mutex_lock(&group->rtpoll_trigger_lock);
+@@ -686,8 +683,12 @@ static void psi_rtpoll_work(struct psi_group *group)
+               goto out;
+       }
+ 
+-      if (now >= group->rtpoll_next_update)
+-              group->rtpoll_next_update = update_triggers(group, now);
++      if (now >= group->rtpoll_next_update) {
++              group->rtpoll_next_update = update_triggers(group, now, &update_total);
++              if (update_total)
++                      memcpy(group->rtpoll_total, group->total[PSI_POLL],
++                                 sizeof(group->rtpoll_total));
++      }
+ 
+       psi_schedule_rtpoll_work(group,
+               nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch b/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch

new file mode 100644 (file)

index 0000000..811894d
--- /dev/null
+++ b/queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch
@@ -0,0 +1,141 @@
+From cd6a5ae395de7987446d45c2944bc8de4a8917f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 19:05:51 +0800
+Subject: sched/psi: Fix avgs_work re-arm in psi_avgs_work()
+
+From: Chengming Zhou <zhouchengming@bytedance.com>
+
+[ Upstream commit 2fcd7bbae90a6d844da8660a9d27079281dfbba2 ]
+
+Pavan reported a problem that PSI avgs_work idle shutoff is not
+working at all. Because PSI_NONIDLE condition would be observed in
+psi_avgs_work()->collect_percpu_times()->get_recent_times() even if
+only the kworker running avgs_work on the CPU.
+
+Although commit 1b69ac6b40eb ("psi: fix aggregation idle shut-off")
+avoided the ping-pong wake problem when the worker sleep, psi_avgs_work()
+still will always re-arm the avgs_work, so shutoff is not working.
+
+This patch changes to use PSI_STATE_RESCHEDULE to flag whether to
+re-arm avgs_work in get_recent_times(). For the current CPU, we re-arm
+avgs_work only when (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0),
+for other CPUs we can just check PSI_NONIDLE delta. The new flag
+is only used in psi_avgs_work(), so we check in get_recent_times()
+that current_work() is avgs_work.
+
+One potential problem is that the brief period of non-idle time
+incurred between the aggregation run and the kworker's dequeue will
+be stranded in the per-cpu buckets until avgs_work run next time.
+The buckets can hold 4s worth of time, and future activity will wake
+the avgs_work with a 2s delay, giving us 2s worth of data we can leave
+behind when shut off the avgs_work. If the kworker run other works after
+avgs_work shut off and doesn't have any scheduler activities for 2s,
+this maybe a problem.
+
+Reported-by: Pavan Kondeti <quic_pkondeti@quicinc.com>
+Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Suren Baghdasaryan <surenb@google.com>
+Tested-by: Chengming Zhou <zhouchengming@bytedance.com>
+Link: https://lore.kernel.org/r/20221014110551.22695-1-zhouchengming@bytedance.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi_types.h |  3 +++
+ kernel/sched/psi.c        | 30 +++++++++++++++++++++++++++---
+ 2 files changed, 30 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 14a1ebb74e11f..1e0a0d7ace3af 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -72,6 +72,9 @@ enum psi_states {
+ /* Use one bit in the state mask to track TSK_ONCPU */
+ #define PSI_ONCPU     (1 << NR_PSI_STATES)
+ 
++/* Flag whether to re-arm avgs_work, see details in get_recent_times() */
++#define PSI_STATE_RESCHEDULE  (1 << (NR_PSI_STATES + 1))
++
+ enum psi_aggregators {
+       PSI_AVGS = 0,
+       PSI_POLL,
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index e83c321461cf4..02e011cabe917 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -243,6 +243,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
+                            u32 *pchanged_states)
+ {
+       struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
++      int current_cpu = raw_smp_processor_id();
++      unsigned int tasks[NR_PSI_TASK_COUNTS];
+       u64 now, state_start;
+       enum psi_states s;
+       unsigned int seq;
+@@ -257,6 +259,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
+               memcpy(times, groupc->times, sizeof(groupc->times));
+               state_mask = groupc->state_mask;
+               state_start = groupc->state_start;
++              if (cpu == current_cpu)
++                      memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
+       } while (read_seqcount_retry(&groupc->seq, seq));
+ 
+       /* Calculate state time deltas against the previous snapshot */
+@@ -281,6 +285,28 @@ static void get_recent_times(struct psi_group *group, int cpu,
+               if (delta)
+                       *pchanged_states |= (1 << s);
+       }
++
++      /*
++       * When collect_percpu_times() from the avgs_work, we don't want to
++       * re-arm avgs_work when all CPUs are IDLE. But the current CPU running
++       * this avgs_work is never IDLE, cause avgs_work can't be shut off.
++       * So for the current CPU, we need to re-arm avgs_work only when
++       * (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0), for other CPUs
++       * we can just check PSI_NONIDLE delta.
++       */
++      if (current_work() == &group->avgs_work.work) {
++              bool reschedule;
++
++              if (cpu == current_cpu)
++                      reschedule = tasks[NR_RUNNING] +
++                                   tasks[NR_IOWAIT] +
++                                   tasks[NR_MEMSTALL] > 1;
++              else
++                      reschedule = *pchanged_states & (1 << PSI_NONIDLE);
++
++              if (reschedule)
++                      *pchanged_states |= PSI_STATE_RESCHEDULE;
++      }
+ }
+ 
+ static void calc_avgs(unsigned long avg[3], int missed_periods,
+@@ -416,7 +442,6 @@ static void psi_avgs_work(struct work_struct *work)
+       struct delayed_work *dwork;
+       struct psi_group *group;
+       u32 changed_states;
+-      bool nonidle;
+       u64 now;
+ 
+       dwork = to_delayed_work(work);
+@@ -427,7 +452,6 @@ static void psi_avgs_work(struct work_struct *work)
+       now = sched_clock();
+ 
+       collect_percpu_times(group, PSI_AVGS, &changed_states);
+-      nonidle = changed_states & (1 << PSI_NONIDLE);
+       /*
+        * If there is task activity, periodically fold the per-cpu
+        * times and feed samples into the running averages. If things
+@@ -438,7 +462,7 @@ static void psi_avgs_work(struct work_struct *work)
+       if (now >= group->avg_next_update)
+               group->avg_next_update = update_averages(group, now);
+ 
+-      if (nonidle) {
++      if (changed_states & PSI_STATE_RESCHEDULE) {
+               schedule_delayed_work(dwork, nsecs_to_jiffies(
+                               group->avg_next_update - now) + 1);
+       }
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch b/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch

new file mode 100644 (file)

index 0000000..2763aad
--- /dev/null
+++ b/queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch
@@ -0,0 +1,247 @@
+From c64ea43f91987426ad1c79576bec5a3f7421d28d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:15 +0200
+Subject: sched/psi: Rearrange polling code in preparation
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 7fab21fa0d000a0ea32d73ce8eec68557c6c268b ]
+
+Move a few functions up in the file to avoid forward declaration needed
+in the patch implementing unprivileged PSI triggers.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-2-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/psi.c | 196 ++++++++++++++++++++++-----------------------
+ 1 file changed, 98 insertions(+), 98 deletions(-)
+
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index 02e011cabe917..fe9269f1d2a46 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -384,92 +384,6 @@ static void collect_percpu_times(struct psi_group *group,
+               *pchanged_states = changed_states;
+ }
+ 
+-static u64 update_averages(struct psi_group *group, u64 now)
+-{
+-      unsigned long missed_periods = 0;
+-      u64 expires, period;
+-      u64 avg_next_update;
+-      int s;
+-
+-      /* avgX= */
+-      expires = group->avg_next_update;
+-      if (now - expires >= psi_period)
+-              missed_periods = div_u64(now - expires, psi_period);
+-
+-      /*
+-       * The periodic clock tick can get delayed for various
+-       * reasons, especially on loaded systems. To avoid clock
+-       * drift, we schedule the clock in fixed psi_period intervals.
+-       * But the deltas we sample out of the per-cpu buckets above
+-       * are based on the actual time elapsing between clock ticks.
+-       */
+-      avg_next_update = expires + ((1 + missed_periods) * psi_period);
+-      period = now - (group->avg_last_update + (missed_periods * psi_period));
+-      group->avg_last_update = now;
+-
+-      for (s = 0; s < NR_PSI_STATES - 1; s++) {
+-              u32 sample;
+-
+-              sample = group->total[PSI_AVGS][s] - group->avg_total[s];
+-              /*
+-               * Due to the lockless sampling of the time buckets,
+-               * recorded time deltas can slip into the next period,
+-               * which under full pressure can result in samples in
+-               * excess of the period length.
+-               *
+-               * We don't want to report non-sensical pressures in
+-               * excess of 100%, nor do we want to drop such events
+-               * on the floor. Instead we punt any overage into the
+-               * future until pressure subsides. By doing this we
+-               * don't underreport the occurring pressure curve, we
+-               * just report it delayed by one period length.
+-               *
+-               * The error isn't cumulative. As soon as another
+-               * delta slips from a period P to P+1, by definition
+-               * it frees up its time T in P.
+-               */
+-              if (sample > period)
+-                      sample = period;
+-              group->avg_total[s] += sample;
+-              calc_avgs(group->avg[s], missed_periods, sample, period);
+-      }
+-
+-      return avg_next_update;
+-}
+-
+-static void psi_avgs_work(struct work_struct *work)
+-{
+-      struct delayed_work *dwork;
+-      struct psi_group *group;
+-      u32 changed_states;
+-      u64 now;
+-
+-      dwork = to_delayed_work(work);
+-      group = container_of(dwork, struct psi_group, avgs_work);
+-
+-      mutex_lock(&group->avgs_lock);
+-
+-      now = sched_clock();
+-
+-      collect_percpu_times(group, PSI_AVGS, &changed_states);
+-      /*
+-       * If there is task activity, periodically fold the per-cpu
+-       * times and feed samples into the running averages. If things
+-       * are idle and there is no data to process, stop the clock.
+-       * Once restarted, we'll catch up the running averages in one
+-       * go - see calc_avgs() and missed_periods.
+-       */
+-      if (now >= group->avg_next_update)
+-              group->avg_next_update = update_averages(group, now);
+-
+-      if (changed_states & PSI_STATE_RESCHEDULE) {
+-              schedule_delayed_work(dwork, nsecs_to_jiffies(
+-                              group->avg_next_update - now) + 1);
+-      }
+-
+-      mutex_unlock(&group->avgs_lock);
+-}
+-
+ /* Trigger tracking window manipulations */
+ static void window_reset(struct psi_window *win, u64 now, u64 value,
+                        u64 prev_growth)
+@@ -516,18 +430,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+       return growth;
+ }
+ 
+-static void init_triggers(struct psi_group *group, u64 now)
+-{
+-      struct psi_trigger *t;
+-
+-      list_for_each_entry(t, &group->triggers, node)
+-              window_reset(&t->win, now,
+-                              group->total[PSI_POLL][t->state], 0);
+-      memcpy(group->polling_total, group->total[PSI_POLL],
+-                 sizeof(group->polling_total));
+-      group->polling_next_update = now + group->poll_min_period;
+-}
+-
+ static u64 update_triggers(struct psi_group *group, u64 now)
+ {
+       struct psi_trigger *t;
+@@ -590,6 +492,104 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+       return now + group->poll_min_period;
+ }
+ 
++static u64 update_averages(struct psi_group *group, u64 now)
++{
++      unsigned long missed_periods = 0;
++      u64 expires, period;
++      u64 avg_next_update;
++      int s;
++
++      /* avgX= */
++      expires = group->avg_next_update;
++      if (now - expires >= psi_period)
++              missed_periods = div_u64(now - expires, psi_period);
++
++      /*
++       * The periodic clock tick can get delayed for various
++       * reasons, especially on loaded systems. To avoid clock
++       * drift, we schedule the clock in fixed psi_period intervals.
++       * But the deltas we sample out of the per-cpu buckets above
++       * are based on the actual time elapsing between clock ticks.
++       */
++      avg_next_update = expires + ((1 + missed_periods) * psi_period);
++      period = now - (group->avg_last_update + (missed_periods * psi_period));
++      group->avg_last_update = now;
++
++      for (s = 0; s < NR_PSI_STATES - 1; s++) {
++              u32 sample;
++
++              sample = group->total[PSI_AVGS][s] - group->avg_total[s];
++              /*
++               * Due to the lockless sampling of the time buckets,
++               * recorded time deltas can slip into the next period,
++               * which under full pressure can result in samples in
++               * excess of the period length.
++               *
++               * We don't want to report non-sensical pressures in
++               * excess of 100%, nor do we want to drop such events
++               * on the floor. Instead we punt any overage into the
++               * future until pressure subsides. By doing this we
++               * don't underreport the occurring pressure curve, we
++               * just report it delayed by one period length.
++               *
++               * The error isn't cumulative. As soon as another
++               * delta slips from a period P to P+1, by definition
++               * it frees up its time T in P.
++               */
++              if (sample > period)
++                      sample = period;
++              group->avg_total[s] += sample;
++              calc_avgs(group->avg[s], missed_periods, sample, period);
++      }
++
++      return avg_next_update;
++}
++
++static void psi_avgs_work(struct work_struct *work)
++{
++      struct delayed_work *dwork;
++      struct psi_group *group;
++      u32 changed_states;
++      u64 now;
++
++      dwork = to_delayed_work(work);
++      group = container_of(dwork, struct psi_group, avgs_work);
++
++      mutex_lock(&group->avgs_lock);
++
++      now = sched_clock();
++
++      collect_percpu_times(group, PSI_AVGS, &changed_states);
++      /*
++       * If there is task activity, periodically fold the per-cpu
++       * times and feed samples into the running averages. If things
++       * are idle and there is no data to process, stop the clock.
++       * Once restarted, we'll catch up the running averages in one
++       * go - see calc_avgs() and missed_periods.
++       */
++      if (now >= group->avg_next_update)
++              group->avg_next_update = update_averages(group, now);
++
++      if (changed_states & PSI_STATE_RESCHEDULE) {
++              schedule_delayed_work(dwork, nsecs_to_jiffies(
++                              group->avg_next_update - now) + 1);
++      }
++
++      mutex_unlock(&group->avgs_lock);
++}
++
++static void init_triggers(struct psi_group *group, u64 now)
++{
++      struct psi_trigger *t;
++
++      list_for_each_entry(t, &group->triggers, node)
++              window_reset(&t->win, now,
++                              group->total[PSI_POLL][t->state], 0);
++      memcpy(group->polling_total, group->total[PSI_POLL],
++                 sizeof(group->polling_total));
++      group->polling_next_update = now + group->poll_min_period;
++}
++
+ /* Schedule polling if it's not already scheduled or forced. */
+ static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
+                                  bool force)
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch b/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch

new file mode 100644 (file)

index 0000000..63cf15f
--- /dev/null
+++ b/queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch
@@ -0,0 +1,432 @@
+From 0970d615d9b33fac51e3ce6bebe313abcf75dfe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:16 +0200
+Subject: sched/psi: Rename existing poll members in preparation
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 65457b74aa9437418e552e8d52d7112d4f9901a6 ]
+
+Renaming in PSI implementation to make a clear distinction between
+privileged and unprivileged triggers code to be implemented in the
+next patch.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-3-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi_types.h |  36 ++++-----
+ kernel/sched/psi.c        | 163 +++++++++++++++++++-------------------
+ 2 files changed, 100 insertions(+), 99 deletions(-)
+
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 1e0a0d7ace3af..1819afa8b1987 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -175,26 +175,26 @@ struct psi_group {
+       u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
+       unsigned long avg[NR_PSI_STATES - 1][3];
+ 
+-      /* Monitor work control */
+-      struct task_struct __rcu *poll_task;
+-      struct timer_list poll_timer;
+-      wait_queue_head_t poll_wait;
+-      atomic_t poll_wakeup;
+-      atomic_t poll_scheduled;
++      /* Monitor RT polling work control */
++      struct task_struct __rcu *rtpoll_task;
++      struct timer_list rtpoll_timer;
++      wait_queue_head_t rtpoll_wait;
++      atomic_t rtpoll_wakeup;
++      atomic_t rtpoll_scheduled;
+ 
+       /* Protects data used by the monitor */
+-      struct mutex trigger_lock;
+-
+-      /* Configured polling triggers */
+-      struct list_head triggers;
+-      u32 nr_triggers[NR_PSI_STATES - 1];
+-      u32 poll_states;
+-      u64 poll_min_period;
+-
+-      /* Total stall times at the start of monitor activation */
+-      u64 polling_total[NR_PSI_STATES - 1];
+-      u64 polling_next_update;
+-      u64 polling_until;
++      struct mutex rtpoll_trigger_lock;
++
++      /* Configured RT polling triggers */
++      struct list_head rtpoll_triggers;
++      u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
++      u32 rtpoll_states;
++      u64 rtpoll_min_period;
++
++      /* Total stall times at the start of RT polling monitor activation */
++      u64 rtpoll_total[NR_PSI_STATES - 1];
++      u64 rtpoll_next_update;
++      u64 rtpoll_until;
+ };
+ 
+ #else /* CONFIG_PSI */
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index fe9269f1d2a46..a3d0b5cf797ab 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -189,14 +189,14 @@ static void group_init(struct psi_group *group)
+       INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
+       mutex_init(&group->avgs_lock);
+       /* Init trigger-related members */
+-      atomic_set(&group->poll_scheduled, 0);
+-      mutex_init(&group->trigger_lock);
+-      INIT_LIST_HEAD(&group->triggers);
+-      group->poll_min_period = U32_MAX;
+-      group->polling_next_update = ULLONG_MAX;
+-      init_waitqueue_head(&group->poll_wait);
+-      timer_setup(&group->poll_timer, poll_timer_fn, 0);
+-      rcu_assign_pointer(group->poll_task, NULL);
++      atomic_set(&group->rtpoll_scheduled, 0);
++      mutex_init(&group->rtpoll_trigger_lock);
++      INIT_LIST_HEAD(&group->rtpoll_triggers);
++      group->rtpoll_min_period = U32_MAX;
++      group->rtpoll_next_update = ULLONG_MAX;
++      init_waitqueue_head(&group->rtpoll_wait);
++      timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
++      rcu_assign_pointer(group->rtpoll_task, NULL);
+ }
+ 
+ void __init psi_init(void)
+@@ -440,11 +440,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+        * On subsequent updates, calculate growth deltas and let
+        * watchers know when their specified thresholds are exceeded.
+        */
+-      list_for_each_entry(t, &group->triggers, node) {
++      list_for_each_entry(t, &group->rtpoll_triggers, node) {
+               u64 growth;
+               bool new_stall;
+ 
+-              new_stall = group->polling_total[t->state] != total[t->state];
++              new_stall = group->rtpoll_total[t->state] != total[t->state];
+ 
+               /* Check for stall activity or a previous threshold breach */
+               if (!new_stall && !t->pending_event)
+@@ -486,10 +486,10 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+       }
+ 
+       if (update_total)
+-              memcpy(group->polling_total, total,
+-                              sizeof(group->polling_total));
++              memcpy(group->rtpoll_total, total,
++                              sizeof(group->rtpoll_total));
+ 
+-      return now + group->poll_min_period;
++      return now + group->rtpoll_min_period;
+ }
+ 
+ static u64 update_averages(struct psi_group *group, u64 now)
+@@ -582,53 +582,53 @@ static void init_triggers(struct psi_group *group, u64 now)
+ {
+       struct psi_trigger *t;
+ 
+-      list_for_each_entry(t, &group->triggers, node)
++      list_for_each_entry(t, &group->rtpoll_triggers, node)
+               window_reset(&t->win, now,
+                               group->total[PSI_POLL][t->state], 0);
+-      memcpy(group->polling_total, group->total[PSI_POLL],
+-                 sizeof(group->polling_total));
+-      group->polling_next_update = now + group->poll_min_period;
++      memcpy(group->rtpoll_total, group->total[PSI_POLL],
++                 sizeof(group->rtpoll_total));
++      group->rtpoll_next_update = now + group->rtpoll_min_period;
+ }
+ 
+ /* Schedule polling if it's not already scheduled or forced. */
+-static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
++static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay,
+                                  bool force)
+ {
+       struct task_struct *task;
+ 
+       /*
+        * atomic_xchg should be called even when !force to provide a
+-       * full memory barrier (see the comment inside psi_poll_work).
++       * full memory barrier (see the comment inside psi_rtpoll_work).
+        */
+-      if (atomic_xchg(&group->poll_scheduled, 1) && !force)
++      if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force)
+               return;
+ 
+       rcu_read_lock();
+ 
+-      task = rcu_dereference(group->poll_task);
++      task = rcu_dereference(group->rtpoll_task);
+       /*
+        * kworker might be NULL in case psi_trigger_destroy races with
+        * psi_task_change (hotpath) which can't use locks
+        */
+       if (likely(task))
+-              mod_timer(&group->poll_timer, jiffies + delay);
++              mod_timer(&group->rtpoll_timer, jiffies + delay);
+       else
+-              atomic_set(&group->poll_scheduled, 0);
++              atomic_set(&group->rtpoll_scheduled, 0);
+ 
+       rcu_read_unlock();
+ }
+ 
+-static void psi_poll_work(struct psi_group *group)
++static void psi_rtpoll_work(struct psi_group *group)
+ {
+       bool force_reschedule = false;
+       u32 changed_states;
+       u64 now;
+ 
+-      mutex_lock(&group->trigger_lock);
++      mutex_lock(&group->rtpoll_trigger_lock);
+ 
+       now = sched_clock();
+ 
+-      if (now > group->polling_until) {
++      if (now > group->rtpoll_until) {
+               /*
+                * We are either about to start or might stop polling if no
+                * state change was recorded. Resetting poll_scheduled leaves
+@@ -638,7 +638,7 @@ static void psi_poll_work(struct psi_group *group)
+                * should be negligible and polling_next_update still keeps
+                * updates correctly on schedule.
+                */
+-              atomic_set(&group->poll_scheduled, 0);
++              atomic_set(&group->rtpoll_scheduled, 0);
+               /*
+                * A task change can race with the poll worker that is supposed to
+                * report on it. To avoid missing events, ensure ordering between
+@@ -667,9 +667,9 @@ static void psi_poll_work(struct psi_group *group)
+ 
+       collect_percpu_times(group, PSI_POLL, &changed_states);
+ 
+-      if (changed_states & group->poll_states) {
++      if (changed_states & group->rtpoll_states) {
+               /* Initialize trigger windows when entering polling mode */
+-              if (now > group->polling_until)
++              if (now > group->rtpoll_until)
+                       init_triggers(group, now);
+ 
+               /*
+@@ -677,50 +677,50 @@ static void psi_poll_work(struct psi_group *group)
+                * minimum tracking window as long as monitor states are
+                * changing.
+                */
+-              group->polling_until = now +
+-                      group->poll_min_period * UPDATES_PER_WINDOW;
++              group->rtpoll_until = now +
++                      group->rtpoll_min_period * UPDATES_PER_WINDOW;
+       }
+ 
+-      if (now > group->polling_until) {
+-              group->polling_next_update = ULLONG_MAX;
++      if (now > group->rtpoll_until) {
++              group->rtpoll_next_update = ULLONG_MAX;
+               goto out;
+       }
+ 
+-      if (now >= group->polling_next_update)
+-              group->polling_next_update = update_triggers(group, now);
++      if (now >= group->rtpoll_next_update)
++              group->rtpoll_next_update = update_triggers(group, now);
+ 
+-      psi_schedule_poll_work(group,
+-              nsecs_to_jiffies(group->polling_next_update - now) + 1,
++      psi_schedule_rtpoll_work(group,
++              nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
+               force_reschedule);
+ 
+ out:
+-      mutex_unlock(&group->trigger_lock);
++      mutex_unlock(&group->rtpoll_trigger_lock);
+ }
+ 
+-static int psi_poll_worker(void *data)
++static int psi_rtpoll_worker(void *data)
+ {
+       struct psi_group *group = (struct psi_group *)data;
+ 
+       sched_set_fifo_low(current);
+ 
+       while (true) {
+-              wait_event_interruptible(group->poll_wait,
+-                              atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
++              wait_event_interruptible(group->rtpoll_wait,
++                              atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) ||
+                               kthread_should_stop());
+               if (kthread_should_stop())
+                       break;
+ 
+-              psi_poll_work(group);
++              psi_rtpoll_work(group);
+       }
+       return 0;
+ }
+ 
+ static void poll_timer_fn(struct timer_list *t)
+ {
+-      struct psi_group *group = from_timer(group, t, poll_timer);
++      struct psi_group *group = from_timer(group, t, rtpoll_timer);
+ 
+-      atomic_set(&group->poll_wakeup, 1);
+-      wake_up_interruptible(&group->poll_wait);
++      atomic_set(&group->rtpoll_wakeup, 1);
++      wake_up_interruptible(&group->rtpoll_wait);
+ }
+ 
+ static void record_times(struct psi_group_cpu *groupc, u64 now)
+@@ -851,8 +851,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
+ 
+       write_seqcount_end(&groupc->seq);
+ 
+-      if (state_mask & group->poll_states)
+-              psi_schedule_poll_work(group, 1, false);
++      if (state_mask & group->rtpoll_states)
++              psi_schedule_rtpoll_work(group, 1, false);
+ 
+       if (wake_clock && !delayed_work_pending(&group->avgs_work))
+               schedule_delayed_work(&group->avgs_work, PSI_FREQ);
+@@ -1005,8 +1005,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
+ 
+               write_seqcount_end(&groupc->seq);
+ 
+-              if (group->poll_states & (1 << PSI_IRQ_FULL))
+-                      psi_schedule_poll_work(group, 1, false);
++              if (group->rtpoll_states & (1 << PSI_IRQ_FULL))
++                      psi_schedule_rtpoll_work(group, 1, false);
+       } while ((group = group->parent));
+ }
+ #endif
+@@ -1101,7 +1101,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
+       cancel_delayed_work_sync(&cgroup->psi->avgs_work);
+       free_percpu(cgroup->psi->pcpu);
+       /* All triggers must be removed by now */
+-      WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
++      WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n");
+       kfree(cgroup->psi);
+ }
+ 
+@@ -1302,29 +1302,29 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+       init_waitqueue_head(&t->event_wait);
+       t->pending_event = false;
+ 
+-      mutex_lock(&group->trigger_lock);
++      mutex_lock(&group->rtpoll_trigger_lock);
+ 
+-      if (!rcu_access_pointer(group->poll_task)) {
++      if (!rcu_access_pointer(group->rtpoll_task)) {
+               struct task_struct *task;
+ 
+-              task = kthread_create(psi_poll_worker, group, "psimon");
++              task = kthread_create(psi_rtpoll_worker, group, "psimon");
+               if (IS_ERR(task)) {
+                       kfree(t);
+-                      mutex_unlock(&group->trigger_lock);
++                      mutex_unlock(&group->rtpoll_trigger_lock);
+                       return ERR_CAST(task);
+               }
+-              atomic_set(&group->poll_wakeup, 0);
++              atomic_set(&group->rtpoll_wakeup, 0);
+               wake_up_process(task);
+-              rcu_assign_pointer(group->poll_task, task);
++              rcu_assign_pointer(group->rtpoll_task, task);
+       }
+ 
+-      list_add(&t->node, &group->triggers);
+-      group->poll_min_period = min(group->poll_min_period,
++      list_add(&t->node, &group->rtpoll_triggers);
++      group->rtpoll_min_period = min(group->rtpoll_min_period,
+               div_u64(t->win.size, UPDATES_PER_WINDOW));
+-      group->nr_triggers[t->state]++;
+-      group->poll_states |= (1 << t->state);
++      group->rtpoll_nr_triggers[t->state]++;
++      group->rtpoll_states |= (1 << t->state);
+ 
+-      mutex_unlock(&group->trigger_lock);
++      mutex_unlock(&group->rtpoll_trigger_lock);
+ 
+       return t;
+ }
+@@ -1349,51 +1349,52 @@ void psi_trigger_destroy(struct psi_trigger *t)
+        */
+       wake_up_pollfree(&t->event_wait);
+ 
+-      mutex_lock(&group->trigger_lock);
++      mutex_lock(&group->rtpoll_trigger_lock);
+ 
+       if (!list_empty(&t->node)) {
+               struct psi_trigger *tmp;
+               u64 period = ULLONG_MAX;
+ 
+               list_del(&t->node);
+-              group->nr_triggers[t->state]--;
+-              if (!group->nr_triggers[t->state])
+-                      group->poll_states &= ~(1 << t->state);
++              group->rtpoll_nr_triggers[t->state]--;
++              if (!group->rtpoll_nr_triggers[t->state])
++                      group->rtpoll_states &= ~(1 << t->state);
+               /* reset min update period for the remaining triggers */
+-              list_for_each_entry(tmp, &group->triggers, node)
++              list_for_each_entry(tmp, &group->rtpoll_triggers, node)
+                       period = min(period, div_u64(tmp->win.size,
+                                       UPDATES_PER_WINDOW));
+-              group->poll_min_period = period;
+-              /* Destroy poll_task when the last trigger is destroyed */
+-              if (group->poll_states == 0) {
+-                      group->polling_until = 0;
++              group->rtpoll_min_period = period;
++              /* Destroy rtpoll_task when the last trigger is destroyed */
++              if (group->rtpoll_states == 0) {
++                      group->rtpoll_until = 0;
+                       task_to_destroy = rcu_dereference_protected(
+-                                      group->poll_task,
+-                                      lockdep_is_held(&group->trigger_lock));
+-                      rcu_assign_pointer(group->poll_task, NULL);
+-                      del_timer(&group->poll_timer);
++                                      group->rtpoll_task,
++                                      lockdep_is_held(&group->rtpoll_trigger_lock));
++                      rcu_assign_pointer(group->rtpoll_task, NULL);
++                      del_timer(&group->rtpoll_timer);
+               }
+       }
+ 
+-      mutex_unlock(&group->trigger_lock);
++      mutex_unlock(&group->rtpoll_trigger_lock);
+ 
+       /*
+-       * Wait for psi_schedule_poll_work RCU to complete its read-side
++       * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
+        * critical section before destroying the trigger and optionally the
+-       * poll_task.
++       * rtpoll_task.
+        */
+       synchronize_rcu();
+       /*
+-       * Stop kthread 'psimon' after releasing trigger_lock to prevent a
+-       * deadlock while waiting for psi_poll_work to acquire trigger_lock
++       * Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent
++       * a deadlock while waiting for psi_rtpoll_work to acquire
++       * rtpoll_trigger_lock
+        */
+       if (task_to_destroy) {
+               /*
+                * After the RCU grace period has expired, the worker
+-               * can no longer be found through group->poll_task.
++               * can no longer be found through group->rtpoll_task.
+                */
+               kthread_stop(task_to_destroy);
+-              atomic_set(&group->poll_scheduled, 0);
++              atomic_set(&group->rtpoll_scheduled, 0);
+       }
+       kfree(t);
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch b/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch

new file mode 100644 (file)

index 0000000..2f9c6ba
--- /dev/null
+++ b/queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch
@@ -0,0 +1,176 @@
+From cc4a5d27580aad5472ec624bab19f12d4556982c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 17:56:12 -0700
+Subject: sched/psi: use kernfs polling functions for PSI trigger polling
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+[ Upstream commit aff037078ecaecf34a7c2afab1341815f90fba5e ]
+
+Destroying psi trigger in cgroup_file_release causes UAF issues when
+a cgroup is removed from under a polling process. This is happening
+because cgroup removal causes a call to cgroup_file_release while the
+actual file is still alive. Destroying the trigger at this point would
+also destroy its waitqueue head and if there is still a polling process
+on that file accessing the waitqueue, it will step on the freed pointer:
+
+do_select
+  vfs_poll
+                           do_rmdir
+                             cgroup_rmdir
+                               kernfs_drain_open_files
+                                 cgroup_file_release
+                                   cgroup_pressure_release
+                                     psi_trigger_destroy
+                                       wake_up_pollfree(&t->event_wait)
+// vfs_poll is unblocked
+                                       synchronize_rcu
+                                       kfree(t)
+  poll_freewait -> UAF access to the trigger's waitqueue head
+
+Patch [1] fixed this issue for epoll() case using wake_up_pollfree(),
+however the same issue exists for synchronous poll() case.
+The root cause of this issue is that the lifecycles of the psi trigger's
+waitqueue and of the file associated with the trigger are different. Fix
+this by using kernfs_generic_poll function when polling on cgroup-specific
+psi triggers. It internally uses kernfs_open_node->poll waitqueue head
+with its lifecycle tied to the file's lifecycle. This also renders the
+fix in [1] obsolete, so revert it.
+
+[1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()")
+
+Fixes: 0e94682b73bf ("psi: introduce psi monitor")
+Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/
+Reported-by: Lu Jialin <lujialin4@huawei.com>
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi.h       |  5 +++--
+ include/linux/psi_types.h |  3 +++
+ kernel/cgroup/cgroup.c    |  2 +-
+ kernel/sched/psi.c        | 29 +++++++++++++++++++++--------
+ 4 files changed, 28 insertions(+), 11 deletions(-)
+
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index ab26200c28033..e0745873e3f26 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
+ void psi_memstall_leave(unsigned long *flags);
+ 
+ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+-struct psi_trigger *psi_trigger_create(struct psi_group *group,
+-                      char *buf, enum psi_res res, struct file *file);
++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
++                                     enum psi_res res, struct file *file,
++                                     struct kernfs_open_file *of);
+ void psi_trigger_destroy(struct psi_trigger *t);
+ 
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 040c089581c6c..f1fd3a8044e0e 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -137,6 +137,9 @@ struct psi_trigger {
+       /* Wait queue for polling */
+       wait_queue_head_t event_wait;
+ 
++      /* Kernfs file for cgroup triggers */
++      struct kernfs_open_file *of;
++
+       /* Pending event flag */
+       int event;
+ 
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index c35efae566a4b..73f11e4db3a4d 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
+       }
+ 
+       psi = cgroup_psi(cgrp);
+-      new = psi_trigger_create(psi, buf, res, of->file);
++      new = psi_trigger_create(psi, buf, res, of->file, of);
+       if (IS_ERR(new)) {
+               cgroup_put(cgrp);
+               return PTR_ERR(new);
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index e072f6b31bf30..80d8c10e93638 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -494,8 +494,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
+                       continue;
+ 
+               /* Generate an event */
+-              if (cmpxchg(&t->event, 0, 1) == 0)
+-                      wake_up_interruptible(&t->event_wait);
++              if (cmpxchg(&t->event, 0, 1) == 0) {
++                      if (t->of)
++                              kernfs_notify(t->of->kn);
++                      else
++                              wake_up_interruptible(&t->event_wait);
++              }
+               t->last_event_time = now;
+               /* Reset threshold breach flag once event got generated */
+               t->pending_event = false;
+@@ -1272,8 +1276,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+       return 0;
+ }
+ 
+-struct psi_trigger *psi_trigger_create(struct psi_group *group,
+-                      char *buf, enum psi_res res, struct file *file)
++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
++                                     enum psi_res res, struct file *file,
++                                     struct kernfs_open_file *of)
+ {
+       struct psi_trigger *t;
+       enum psi_states state;
+@@ -1333,7 +1338,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ 
+       t->event = 0;
+       t->last_event_time = 0;
+-      init_waitqueue_head(&t->event_wait);
++      t->of = of;
++      if (!of)
++              init_waitqueue_head(&t->event_wait);
+       t->pending_event = false;
+       t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+ 
+@@ -1390,7 +1397,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
+        * being accessed later. Can happen if cgroup is deleted from under a
+        * polling process.
+        */
+-      wake_up_pollfree(&t->event_wait);
++      if (t->of)
++              kernfs_notify(t->of->kn);
++      else
++              wake_up_interruptible(&t->event_wait);
+ 
+       if (t->aggregator == PSI_AVGS) {
+               mutex_lock(&group->avgs_lock);
+@@ -1462,7 +1472,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
+       if (!t)
+               return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+ 
+-      poll_wait(file, &t->event_wait, wait);
++      if (t->of)
++              kernfs_generic_poll(t->of, wait);
++      else
++              poll_wait(file, &t->event_wait, wait);
+ 
+       if (cmpxchg(&t->event, 1, 0) == 1)
+               ret |= EPOLLPRI;
+@@ -1532,7 +1545,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+               return -EBUSY;
+       }
+ 
+-      new = psi_trigger_create(&psi_system, buf, res, file);
++      new = psi_trigger_create(&psi_system, buf, res, file, NULL);
+       if (IS_ERR(new)) {
+               mutex_unlock(&seq->lock);
+               return PTR_ERR(new);
+-- 
+2.39.2
+
diff --git a/queue-6.1/security-keys-modify-mismatched-function-name.patch b/queue-6.1/security-keys-modify-mismatched-function-name.patch

new file mode 100644 (file)

index 0000000..964df76
--- /dev/null
+++ b/queue-6.1/security-keys-modify-mismatched-function-name.patch
@@ -0,0 +1,40 @@
+From d5bcc1aba8ad5267a2fd8d1da3794a97630d9c16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 10:18:25 +0800
+Subject: security: keys: Modify mismatched function name
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+[ Upstream commit 2a4152742025c5f21482e8cebc581702a0fa5b01 ]
+
+No functional modification involved.
+
+security/keys/trusted-keys/trusted_tpm2.c:203: warning: expecting prototype for tpm_buf_append_auth(). Prototype was for tpm2_buf_append_auth() instead.
+
+Fixes: 2e19e10131a0 ("KEYS: trusted: Move TPM2 trusted keys code")
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5524
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Reviewed-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ security/keys/trusted-keys/trusted_tpm2.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
+index 2b2c8eb258d5b..bc700f85f80be 100644
+--- a/security/keys/trusted-keys/trusted_tpm2.c
++++ b/security/keys/trusted-keys/trusted_tpm2.c
+@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen,
+ }
+ 
+ /**
+- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
++ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+  *
+  * @buf: an allocated tpm_buf instance
+  * @session_handle: session handle
+-- 
+2.39.2
+
diff --git a/queue-6.1/series b/queue-6.1/series

index e7bc4dd22318276f2cbb9003f09c6425e323170f..6df3335dfa75cb32163d0c49080f3dc8692ec132 100644 (file)
--- a/queue-6.1/series
+++ b/queue-6.1/series
@@ -49,3 +49,116 @@ asoc-codecs-wcd934x-fix-resource-leaks-on-component-remove.patch
  asoc-codecs-wcd938x-fix-codec-initialisation-race.patch
  asoc-codecs-wcd938x-fix-soundwire-initialisation-race.patch
  ext4-correct-inline-offset-when-handling-xattrs-in-inode-body.patch
+drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch
+alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch
+quota-properly-disable-quotas-when-add_dquot_ref-fai.patch
+quota-fix-warning-in-dqgrab.patch
+hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch
+ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch
+udf-fix-uninitialized-array-access-for-some-pathname.patch
+fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch
+mips-dec-prom-address-warray-bounds-warning.patch
+fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch
+fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch
+acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch
+rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch
+rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch
+sched-fair-don-t-balance-task-to-its-current-running.patch
+wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch
+bpf-print-a-warning-only-if-writing-to-unprivileged_.patch
+bpf-address-kcsan-report-on-bpf_lru_list.patch
+bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch
+wifi-ath11k-add-support-default-regdb-while-searchin.patch
+wifi-mac80211_hwsim-fix-possible-null-dereference.patch
+spi-dw-add-compatible-for-intel-mount-evans-soc.patch
+wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch
+net-ethernet-litex-add-support-for-64-bit-stats.patch
+devlink-report-devlink_port_type_warn-source-device.patch
+wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch
+wifi-iwlwifi-add-support-for-new-pci-id.patch
+wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch
+wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch
+igb-fix-igb_down-hung-on-surprise-removal.patch
+net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch
+asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch
+asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch
+asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch
+asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch
+sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch
+sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch
+sched-psi-rearrange-polling-code-in-preparation.patch
+sched-psi-rename-existing-poll-members-in-preparatio.patch
+sched-psi-extract-update_triggers-side-effect.patch
+sched-psi-allow-unprivileged-polling-of-n-2s-period.patch
+sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch
+pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch
+pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch
+spi-bcm63xx-fix-max-prepend-length.patch
+fbdev-imxfb-warn-about-invalid-left-right-margin.patch
+fbdev-imxfb-removed-unneeded-release_mem_region.patch
+perf-build-fix-library-not-found-error-when-using-cs.patch
+btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch
+spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch
+kallsyms-improve-the-performance-of-kallsyms_lookup_.patch
+kallsyms-correctly-sequence-symbols-when-config_lto_.patch
+kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch
+dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch
+net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch
+bridge-add-extack-warning-when-enabling-stp-in-netns.patch
+net-ethernet-mtk_eth_soc-handle-probe-deferral.patch
+cifs-fix-mid-leak-during-reconnection-after-timeout-.patch
+asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch
+net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch
+net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch
+net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch
+net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch
+net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch
+net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch
+net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch
+net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch
+iavf-fix-use-after-free-in-free_netdev.patch
+iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch
+iavf-use-internal-state-to-free-traffic-irqs.patch
+iavf-move-netdev_update_features-into-watchdog-task.patch
+iavf-send-vlan-offloading-caps-once-after-vfr.patch
+iavf-make-functions-static-where-possible.patch
+iavf-wait-for-reset-in-callbacks-which-trigger-it.patch
+iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch
+iavf-fix-reset-task-race-with-iavf_remove.patch
+security-keys-modify-mismatched-function-name.patch
+octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch
+bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch
+bpf-repeat-check_max_stack_depth-for-async-callbacks.patch
+bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch
+igc-avoid-transmit-queue-timeout-for-xdp.patch
+igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch
+net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch
+tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch
+tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch
+net-ipv4-use-kfree_sensitive-instead-of-kfree.patch
+net-ipv6-check-return-value-of-pskb_trim.patch
+revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch
+fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch
+llc-don-t-drop-packet-from-non-root-netns.patch
+alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch
+netfilter-nf_tables-fix-spurious-set-element-inserti.patch
+netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch
+netfilter-nft_set_pipapo-fix-improper-element-remova.patch
+netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch
+netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch
+bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch
+bluetooth-hci_event-call-disconnect-callback-before-.patch
+bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch
+bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch
+tcp-annotate-data-races-around-tp-tcp_tx_delay.patch
+tcp-annotate-data-races-around-tp-tsoffset.patch
+tcp-annotate-data-races-around-tp-keepalive_time.patch
+tcp-annotate-data-races-around-tp-keepalive_intvl.patch
+tcp-annotate-data-races-around-tp-keepalive_probes.patch
+tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch
+tcp-annotate-data-races-around-tp-linger2.patch
+tcp-annotate-data-races-around-rskq_defer_accept.patch
+tcp-annotate-data-races-around-tp-notsent_lowat.patch
+tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch
+tcp-annotate-data-races-around-fastopenq.max_qlen.patch
+net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch
diff --git a/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch b/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch

new file mode 100644 (file)

index 0000000..378e34a
--- /dev/null
+++ b/queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch
@@ -0,0 +1,47 @@
+From cf5e36388cb882c6653cd3159ae15b19b12d882e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 09:14:52 +0200
+Subject: spi: bcm63xx: fix max prepend length
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ]
+
+The command word is defined as following:
+
+    /* Command */
+    #define SPI_CMD_COMMAND_SHIFT           0
+    #define SPI_CMD_DEVICE_ID_SHIFT         4
+    #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT  8
+    #define SPI_CMD_ONE_BYTE_SHIFT          11
+    #define SPI_CMD_ONE_WIRE_SHIFT          12
+
+If the prepend byte count field starts at bit 8, and the next defined
+bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and
+thus the max value is 7, not 15.
+
+Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-bcm63xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
+index 80fa0ef8909ca..147199002df1e 100644
+--- a/drivers/spi/spi-bcm63xx.c
++++ b/drivers/spi/spi-bcm63xx.c
+@@ -126,7 +126,7 @@ enum bcm63xx_regs_spi {
+       SPI_MSG_DATA_SIZE,
+ };
+ 
+-#define BCM63XX_SPI_MAX_PREPEND               15
++#define BCM63XX_SPI_MAX_PREPEND               7
+ 
+ #define BCM63XX_SPI_MAX_CS            8
+ #define BCM63XX_SPI_BUS_NUM           0
+-- 
+2.39.2
+
diff --git a/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch b/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch

new file mode 100644 (file)

index 0000000..26ebd33
--- /dev/null
+++ b/queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch
@@ -0,0 +1,81 @@
+From a47a909fedf766372d2d6e58a2e2e2694d9e1dfe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 07:54:01 -0700
+Subject: spi: dw: Add compatible for Intel Mount Evans SoC
+
+From: Abe Kohandel <abe.kohandel@intel.com>
+
+[ Upstream commit 0760d5d0e9f0c0e2200a0323a61d1995bb745dee ]
+
+The Intel Mount Evans SoC's Integrated Management Complex uses the SPI
+controller for access to a NOR SPI FLASH. However, the SoC doesn't
+provide a mechanism to override the native chip select signal.
+
+This driver doesn't use DMA for memory operations when a chip select
+override is not provided due to the native chip select timing behavior.
+As a result no DMA configuration is done for the controller and this
+configuration is not tested.
+
+The controller also has an errata where a full TX FIFO can result in
+data corruption. The suggested workaround is to never completely fill
+the FIFO. The TX FIFO has a size of 32 so the fifo_len is set to 31.
+
+Signed-off-by: Abe Kohandel <abe.kohandel@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20230606145402.474866-2-abe.kohandel@intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-dw-mmio.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
+index 26c40ea6dd129..7e8478ad74e55 100644
+--- a/drivers/spi/spi-dw-mmio.c
++++ b/drivers/spi/spi-dw-mmio.c
+@@ -222,6 +222,31 @@ static int dw_spi_intel_init(struct platform_device *pdev,
+       return 0;
+ }
+ 
++/*
++ * The Intel Mount Evans SoC's Integrated Management Complex uses the
++ * SPI controller for access to a NOR SPI FLASH. However, the SoC doesn't
++ * provide a mechanism to override the native chip select signal.
++ *
++ * This driver doesn't use DMA for memory operations when a chip select
++ * override is not provided due to the native chip select timing behavior.
++ * As a result no DMA configuration is done for the controller and this
++ * configuration is not tested.
++ */
++static int dw_spi_mountevans_imc_init(struct platform_device *pdev,
++                                    struct dw_spi_mmio *dwsmmio)
++{
++      /*
++       * The Intel Mount Evans SoC's Integrated Management Complex DW
++       * apb_ssi_v4.02a controller has an errata where a full TX FIFO can
++       * result in data corruption. The suggested workaround is to never
++       * completely fill the FIFO. The TX FIFO has a size of 32 so the
++       * fifo_len is set to 31.
++       */
++      dwsmmio->dws.fifo_len = 31;
++
++      return 0;
++}
++
+ static int dw_spi_canaan_k210_init(struct platform_device *pdev,
+                                  struct dw_spi_mmio *dwsmmio)
+ {
+@@ -350,6 +375,10 @@ static const struct of_device_id dw_spi_mmio_of_match[] = {
+       { .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_hssi_init},
+       { .compatible = "intel,keembay-ssi", .data = dw_spi_intel_init},
+       { .compatible = "intel,thunderbay-ssi", .data = dw_spi_intel_init},
++      {
++              .compatible = "intel,mountevans-imc-ssi",
++              .data = dw_spi_mountevans_imc_init,
++      },
+       { .compatible = "microchip,sparx5-spi", dw_spi_mscc_sparx5_init},
+       { .compatible = "canaan,k210-spi", dw_spi_canaan_k210_init},
+       { /* end of table */}
+-- 
+2.39.2
+
diff --git a/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch b/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch

new file mode 100644 (file)

index 0000000..8843429
--- /dev/null
+++ b/queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch
@@ -0,0 +1,40 @@
+From f832b5453eead49443949271d5828c464703455b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 17:20:20 +0900
+Subject: spi: s3c64xx: clear loopback bit after loopback test
+
+From: Jaewon Kim <jaewon02.kim@samsung.com>
+
+[ Upstream commit 9ec3c5517e22a12d2ff1b71e844f7913641460c6 ]
+
+When SPI loopback transfer is performed, S3C64XX_SPI_MODE_SELF_LOOPBACK
+bit still remained. It works as loopback even if the next transfer is
+not spi loopback mode.
+If not SPI_LOOP, needs to clear S3C64XX_SPI_MODE_SELF_LOOPBACK bit.
+
+Signed-off-by: Jaewon Kim <jaewon02.kim@samsung.com>
+Fixes: ffb7bcd3b27e ("spi: s3c64xx: support loopback mode")
+Reviewed-by: Chanho Park <chanho61.park@samsung.com>
+Link: https://lore.kernel.org/r/20230711082020.138165-1-jaewon02.kim@samsung.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-s3c64xx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
+index 71d324ec9a70a..1480df7b43b3f 100644
+--- a/drivers/spi/spi-s3c64xx.c
++++ b/drivers/spi/spi-s3c64xx.c
+@@ -668,6 +668,8 @@ static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
+ 
+       if ((sdd->cur_mode & SPI_LOOP) && sdd->port_conf->has_loopback)
+               val |= S3C64XX_SPI_MODE_SELF_LOOPBACK;
++      else
++              val &= ~S3C64XX_SPI_MODE_SELF_LOOPBACK;
+ 
+       writel(val, regs + S3C64XX_SPI_MODE_CFG);
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch b/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch

new file mode 100644 (file)

index 0000000..8d091d7
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch
@@ -0,0 +1,77 @@
+From 7035bedf31a88876c025d69b93d6ebb0256f36f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:57 +0000
+Subject: tcp: annotate data-races around fastopenq.max_qlen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ]
+
+This field can be read locklessly.
+
+Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tcp.h     | 2 +-
+ net/ipv4/tcp.c          | 2 +-
+ net/ipv4/tcp_fastopen.c | 6 ++++--
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/tcp.h b/include/linux/tcp.h
+index 41b1da621a458..9cd289ad3f5b5 100644
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -510,7 +510,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+       struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+       int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
+ 
+-      queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
++      WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
+ }
+ 
+ static inline void tcp_move_syn(struct tcp_sock *tp,
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index b3a5ff311567b..fab25d4f3a6f1 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -4247,7 +4247,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_FASTOPEN:
+-              val = icsk->icsk_accept_queue.fastopenq.max_qlen;
++              val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
+               break;
+ 
+       case TCP_FASTOPEN_CONNECT:
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 45cc7f1ca2961..85e4953f11821 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
+ static bool tcp_fastopen_queue_check(struct sock *sk)
+ {
+       struct fastopen_queue *fastopenq;
++      int max_qlen;
+ 
+       /* Make sure the listener has enabled fastopen, and we don't
+        * exceed the max # of pending TFO requests allowed before trying
+@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
+        * temporarily vs a server not supporting Fast Open at all.
+        */
+       fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+-      if (fastopenq->max_qlen == 0)
++      max_qlen = READ_ONCE(fastopenq->max_qlen);
++      if (max_qlen == 0)
+               return false;
+ 
+-      if (fastopenq->qlen >= fastopenq->max_qlen) {
++      if (fastopenq->qlen >= max_qlen) {
+               struct request_sock *req1;
+               spin_lock(&fastopenq->lock);
+               req1 = fastopenq->rskq_rst_head;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch

new file mode 100644 (file)

index 0000000..abaaf2e
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch
@@ -0,0 +1,69 @@
+From ae744dd736807b48f042d785128b2d771387f69c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:52 +0000
+Subject: tcp: annotate data-races around icsk->icsk_syn_retries
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3a037f0f3c4bfe44518f2fbb478aa2f99a9cd8bb ]
+
+do_tcp_getsockopt() and reqsk_timer_handler() read
+icsk->icsk_syn_retries while another cpu might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_connection_sock.c | 2 +-
+ net/ipv4/tcp.c                  | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 8e35ea66d930a..62a3b103f258a 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1016,7 +1016,7 @@ static void reqsk_timer_handler(struct timer_list *t)
+ 
+       icsk = inet_csk(sk_listener);
+       net = sock_net(sk_listener);
+-      max_syn_ack_retries = icsk->icsk_syn_retries ? :
++      max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
+               READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
+       /* Normally all the openreqs are young and become mature
+        * (i.e. converted to established socket) for first timeout.
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 7d75928ea0f9c..ffa9717293358 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3397,7 +3397,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
+               return -EINVAL;
+ 
+       lock_sock(sk);
+-      inet_csk(sk)->icsk_syn_retries = val;
++      WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
+       release_sock(sk);
+       return 0;
+ }
+@@ -3678,7 +3678,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               if (val < 1 || val > MAX_TCP_SYNCNT)
+                       err = -EINVAL;
+               else
+-                      icsk->icsk_syn_retries = val;
++                      WRITE_ONCE(icsk->icsk_syn_retries, val);
+               break;
+ 
+       case TCP_SAVE_SYN:
+@@ -4095,7 +4095,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               val = keepalive_probes(tp);
+               break;
+       case TCP_SYNCNT:
+-              val = icsk->icsk_syn_retries ? :
++              val = READ_ONCE(icsk->icsk_syn_retries) ? :
+                       READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+               break;
+       case TCP_LINGER2:
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch

new file mode 100644 (file)

index 0000000..1840f3a
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch
@@ -0,0 +1,54 @@
+From 7efbdf0a8a4d26103224e8eb9779b4b5c48a11c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:56 +0000
+Subject: tcp: annotate data-races around icsk->icsk_user_timeout
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 26023e91e12c68669db416b97234328a03d8e499 ]
+
+This field can be read locklessly from do_tcp_getsockopt()
+
+Fixes: dca43c75e7e5 ("tcp: Add TCP_USER_TIMEOUT socket option.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-11-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 6f3a494b965ae..b3a5ff311567b 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3406,7 +3406,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
+ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+ {
+       lock_sock(sk);
+-      inet_csk(sk)->icsk_user_timeout = val;
++      WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
+       release_sock(sk);
+ }
+ EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+@@ -3726,7 +3726,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               if (val < 0)
+                       err = -EINVAL;
+               else
+-                      icsk->icsk_user_timeout = val;
++                      WRITE_ONCE(icsk->icsk_user_timeout, val);
+               break;
+ 
+       case TCP_FASTOPEN:
+@@ -4243,7 +4243,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_USER_TIMEOUT:
+-              val = icsk->icsk_user_timeout;
++              val = READ_ONCE(icsk->icsk_user_timeout);
+               break;
+ 
+       case TCP_FASTOPEN:
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch b/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch

new file mode 100644 (file)

index 0000000..11e7afc
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch
@@ -0,0 +1,53 @@
+From 7cb1fa4e8fc2528b3c95ebf4367b85eaf269c0e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:54 +0000
+Subject: tcp: annotate data-races around rskq_defer_accept
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ]
+
+do_tcp_getsockopt() reads rskq_defer_accept while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 363535b6ece83..bc3ad48f92389 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3700,9 +3700,9 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ 
+       case TCP_DEFER_ACCEPT:
+               /* Translate value in seconds to number of retransmits */
+-              icsk->icsk_accept_queue.rskq_defer_accept =
+-                      secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+-                                      TCP_RTO_MAX / HZ);
++              WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
++                         secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
++                                         TCP_RTO_MAX / HZ));
+               break;
+ 
+       case TCP_WINDOW_CLAMP:
+@@ -4104,8 +4104,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+                       val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+               break;
+       case TCP_DEFER_ACCEPT:
+-              val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+-                                    TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
++              val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
++              val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
++                                    TCP_RTO_MAX / HZ);
+               break;
+       case TCP_WINDOW_CLAMP:
+               val = tp->window_clamp;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch

new file mode 100644 (file)

index 0000000..ec6abda
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch
@@ -0,0 +1,184 @@
+From 2a19bb80f620e9115ee081f89944c9fc3882cceb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:44:45 +0000
+Subject: tcp: annotate data-races around tcp_rsk(req)->ts_recent
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit eba20811f32652bc1a52d5e7cc403859b86390d9 ]
+
+TCP request sockets are lockless, tcp_rsk(req)->ts_recent
+can change while being read by another cpu as syzbot noticed.
+
+This is harmless, but we should annotate the known races.
+
+Note that tcp_check_req() changes req->ts_recent a bit early,
+we might change this in the future.
+
+BUG: KCSAN: data-race in tcp_check_req / tcp_check_req
+
+write to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 1:
+tcp_check_req+0x694/0xc70 net/ipv4/tcp_minisocks.c:762
+tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071
+ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205
+ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254
+dst_input include/net/dst.h:468 [inline]
+ip_rcv_finish net/ipv4/ip_input.c:449 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569
+__netif_receive_skb_one_core net/core/dev.c:5493 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607
+process_backlog+0x21f/0x380 net/core/dev.c:5935
+__napi_poll+0x60/0x3b0 net/core/dev.c:6498
+napi_poll net/core/dev.c:6565 [inline]
+net_rx_action+0x32b/0x750 net/core/dev.c:6698
+__do_softirq+0xc1/0x265 kernel/softirq.c:571
+do_softirq+0x7e/0xb0 kernel/softirq.c:472
+__local_bh_enable_ip+0x64/0x70 kernel/softirq.c:396
+local_bh_enable+0x1f/0x20 include/linux/bottom_half.h:33
+rcu_read_unlock_bh include/linux/rcupdate.h:843 [inline]
+__dev_queue_xmit+0xabb/0x1d10 net/core/dev.c:4271
+dev_queue_xmit include/linux/netdevice.h:3088 [inline]
+neigh_hh_output include/net/neighbour.h:528 [inline]
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x700/0x840 net/ipv4/ip_output.c:229
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:317
+NF_HOOK_COND include/linux/netfilter.h:292 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:431
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:126 [inline]
+__ip_queue_xmit+0xa4d/0xa70 net/ipv4/ip_output.c:533
+ip_queue_xmit+0x38/0x40 net/ipv4/ip_output.c:547
+__tcp_transmit_skb+0x1194/0x16e0 net/ipv4/tcp_output.c:1399
+tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline]
+tcp_write_xmit+0x13ff/0x2fd0 net/ipv4/tcp_output.c:2693
+__tcp_push_pending_frames+0x6a/0x1a0 net/ipv4/tcp_output.c:2877
+tcp_push_pending_frames include/net/tcp.h:1952 [inline]
+__tcp_sock_set_cork net/ipv4/tcp.c:3336 [inline]
+tcp_sock_set_cork+0xe8/0x100 net/ipv4/tcp.c:3343
+rds_tcp_xmit_path_complete+0x3b/0x40 net/rds/tcp_send.c:52
+rds_send_xmit+0xf8d/0x1420 net/rds/send.c:422
+rds_send_worker+0x42/0x1d0 net/rds/threads.c:200
+process_one_work+0x3e6/0x750 kernel/workqueue.c:2408
+worker_thread+0x5f2/0xa10 kernel/workqueue.c:2555
+kthread+0x1d7/0x210 kernel/kthread.c:379
+ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
+
+read to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 0:
+tcp_check_req+0x32a/0xc70 net/ipv4/tcp_minisocks.c:622
+tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071
+ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205
+ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254
+dst_input include/net/dst.h:468 [inline]
+ip_rcv_finish net/ipv4/ip_input.c:449 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569
+__netif_receive_skb_one_core net/core/dev.c:5493 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607
+process_backlog+0x21f/0x380 net/core/dev.c:5935
+__napi_poll+0x60/0x3b0 net/core/dev.c:6498
+napi_poll net/core/dev.c:6565 [inline]
+net_rx_action+0x32b/0x750 net/core/dev.c:6698
+__do_softirq+0xc1/0x265 kernel/softirq.c:571
+run_ksoftirqd+0x17/0x20 kernel/softirq.c:939
+smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164
+kthread+0x1d7/0x210 kernel/kthread.c:379
+ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
+
+value changed: 0x1cd237f1 -> 0x1cd237f2
+
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717144445.653164-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c      | 2 +-
+ net/ipv4/tcp_minisocks.c | 9 ++++++---
+ net/ipv4/tcp_output.c    | 2 +-
+ net/ipv6/tcp_ipv6.c      | 2 +-
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index e5df50b3e23a0..d49a66b271d52 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -988,7 +988,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+                       tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+-                      req->ts_recent,
++                      READ_ONCE(req->ts_recent),
+                       0,
+                       tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+                       inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index f281eab7fd125..42844d20da020 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+       newtp->max_window = newtp->snd_wnd;
+ 
+       if (newtp->rx_opt.tstamp_ok) {
+-              newtp->rx_opt.ts_recent = req->ts_recent;
++              newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
+               newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+               newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+       } else {
+@@ -601,7 +601,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+               tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+ 
+               if (tmp_opt.saw_tstamp) {
+-                      tmp_opt.ts_recent = req->ts_recent;
++                      tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
+                       if (tmp_opt.rcv_tsecr)
+                               tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+                       /* We do not store true stamp, but it is not required,
+@@ -740,8 +740,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ 
+       /* In sequence, PAWS is OK. */
+ 
++      /* TODO: We probably should defer ts_recent change once
++       * we take ownership of @req.
++       */
+       if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+-              req->ts_recent = tmp_opt.rcv_tsval;
++              WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
+ 
+       if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
+               /* Truncate SYN, it is out of window starting
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 693a29d3f43bd..26bd039f9296f 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -876,7 +876,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
+       if (likely(ireq->tstamp_ok)) {
+               opts->options |= OPTION_TS;
+               opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
+-              opts->tsecr = req->ts_recent;
++              opts->tsecr = READ_ONCE(req->ts_recent);
+               remaining -= TCPOLEN_TSTAMP_ALIGNED;
+       }
+       if (likely(ireq->sack_ok)) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 0dcb06a1fe044..d9253aa764fae 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1130,7 +1130,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       tcp_rsk(req)->rcv_nxt,
+                       req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+                       tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+-                      req->ts_recent, sk->sk_bound_dev_if,
++                      READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
+                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+                       ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+                       READ_ONCE(tcp_rsk(req)->txhash));
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch

new file mode 100644 (file)

index 0000000..7cee347
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch
@@ -0,0 +1,170 @@
+From d29e41820d443947afb2314e6e9891e047903726 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:44:44 +0000
+Subject: tcp: annotate data-races around tcp_rsk(req)->txhash
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 5e5265522a9a7f91d1b0bd411d634bdaf16c80cd ]
+
+TCP request sockets are lockless, some of their fields
+can change while being read by another cpu as syzbot noticed.
+
+This is usually harmless, but we should annotate the known
+races.
+
+This patch takes care of tcp_rsk(req)->txhash,
+a separate one is needed for tcp_rsk(req)->ts_recent.
+
+BUG: KCSAN: data-race in tcp_make_synack / tcp_rtx_synack
+
+write to 0xffff8881362304bc of 4 bytes by task 32083 on cpu 1:
+tcp_rtx_synack+0x9d/0x2a0 net/ipv4/tcp_output.c:4213
+inet_rtx_syn_ack+0x38/0x80 net/ipv4/inet_connection_sock.c:880
+tcp_check_req+0x379/0xc70 net/ipv4/tcp_minisocks.c:665
+tcp_v6_rcv+0x125b/0x1b20 net/ipv6/tcp_ipv6.c:1673
+ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437
+ip6_input_finish net/ipv6/ip6_input.c:482 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491
+dst_input include/net/dst.h:468 [inline]
+ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309
+__netif_receive_skb_one_core net/core/dev.c:5452 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566
+netif_receive_skb_internal net/core/dev.c:5652 [inline]
+netif_receive_skb+0x4a/0x310 net/core/dev.c:5711
+tun_rx_batched+0x3bf/0x400
+tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997
+tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043
+call_write_iter include/linux/fs.h:1871 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x4ab/0x7d0 fs/read_write.c:584
+ksys_write+0xeb/0x1a0 fs/read_write.c:637
+__do_sys_write fs/read_write.c:649 [inline]
+__se_sys_write fs/read_write.c:646 [inline]
+__x64_sys_write+0x42/0x50 fs/read_write.c:646
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff8881362304bc of 4 bytes by task 32078 on cpu 0:
+tcp_make_synack+0x367/0xb40 net/ipv4/tcp_output.c:3663
+tcp_v6_send_synack+0x72/0x420 net/ipv6/tcp_ipv6.c:544
+tcp_conn_request+0x11a8/0x1560 net/ipv4/tcp_input.c:7059
+tcp_v6_conn_request+0x13f/0x180 net/ipv6/tcp_ipv6.c:1175
+tcp_rcv_state_process+0x156/0x1de0 net/ipv4/tcp_input.c:6494
+tcp_v6_do_rcv+0x98a/0xb70 net/ipv6/tcp_ipv6.c:1509
+tcp_v6_rcv+0x17b8/0x1b20 net/ipv6/tcp_ipv6.c:1735
+ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437
+ip6_input_finish net/ipv6/ip6_input.c:482 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491
+dst_input include/net/dst.h:468 [inline]
+ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309
+__netif_receive_skb_one_core net/core/dev.c:5452 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566
+netif_receive_skb_internal net/core/dev.c:5652 [inline]
+netif_receive_skb+0x4a/0x310 net/core/dev.c:5711
+tun_rx_batched+0x3bf/0x400
+tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997
+tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043
+call_write_iter include/linux/fs.h:1871 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x4ab/0x7d0 fs/read_write.c:584
+ksys_write+0xeb/0x1a0 fs/read_write.c:637
+__do_sys_write fs/read_write.c:649 [inline]
+__se_sys_write fs/read_write.c:646 [inline]
+__x64_sys_write+0x42/0x50 fs/read_write.c:646
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x91d25731 -> 0xe79325cd
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 32078 Comm: syz-executor.4 Not tainted 6.5.0-rc1-syzkaller-00033-geb26cbb1a754 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
+
+Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717144445.653164-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c      | 3 ++-
+ net/ipv4/tcp_minisocks.c | 2 +-
+ net/ipv4/tcp_output.c    | 4 ++--
+ net/ipv6/tcp_ipv6.c      | 2 +-
+ 4 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index ef740983a1222..e5df50b3e23a0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -992,7 +992,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       0,
+                       tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+                       inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+-                      ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
++                      ip_hdr(skb)->tos,
++                      READ_ONCE(tcp_rsk(req)->txhash));
+ }
+ 
+ /*
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 7f37e7da64671..f281eab7fd125 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+       newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
+ 
+       newtp->lsndtime = tcp_jiffies32;
+-      newsk->sk_txhash = treq->txhash;
++      newsk->sk_txhash = READ_ONCE(treq->txhash);
+       newtp->total_retrans = req->num_retrans;
+ 
+       tcp_init_xmit_timers(newsk);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 925594dbeb929..693a29d3f43bd 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3581,7 +3581,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
+       rcu_read_lock();
+       md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
+ #endif
+-      skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
++      skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
+       /* bpf program will be interested in the tcp_flags */
+       TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
+       tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+@@ -4124,7 +4124,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
+ 
+       /* Paired with WRITE_ONCE() in sock_setsockopt() */
+       if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
+-              tcp_rsk(req)->txhash = net_tx_rndhash();
++              WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
+       res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
+                                 NULL);
+       if (!res) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 8d61efeab9c99..0dcb06a1fe044 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1133,7 +1133,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+                       req->ts_recent, sk->sk_bound_dev_if,
+                       tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+                       ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+-                      tcp_rsk(req)->txhash);
++                      READ_ONCE(tcp_rsk(req)->txhash));
+ }
+ 
+ 
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch

new file mode 100644 (file)

index 0000000..5dfc88a
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch
@@ -0,0 +1,68 @@
+From 078902bb3940caf45e1f58470e88e8184a16486d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:50 +0000
+Subject: tcp: annotate data-races around tp->keepalive_intvl
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 5ecf9d4f52ff2f1d4d44c9b68bc75688e82f13b4 ]
+
+do_tcp_getsockopt() reads tp->keepalive_intvl while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-5-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 9 +++++++--
+ net/ipv4/tcp.c    | 4 ++--
+ 2 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 397c248102415..f39c44cbdfe62 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1511,9 +1511,14 @@ void tcp_leave_memory_pressure(struct sock *sk);
+ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
++      int val;
++
++      /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
++       * and do_tcp_setsockopt().
++       */
++      val = READ_ONCE(tp->keepalive_intvl);
+ 
+-      return tp->keepalive_intvl ? :
+-              READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
++      return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ }
+ 
+ static inline int keepalive_time_when(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index c0d7b226bca1a..d19cfeb78392d 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3451,7 +3451,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
+               return -EINVAL;
+ 
+       lock_sock(sk);
+-      tcp_sk(sk)->keepalive_intvl = val * HZ;
++      WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
+       release_sock(sk);
+       return 0;
+ }
+@@ -3665,7 +3665,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               if (val < 1 || val > MAX_TCP_KEEPINTVL)
+                       err = -EINVAL;
+               else
+-                      tp->keepalive_intvl = val * HZ;
++                      WRITE_ONCE(tp->keepalive_intvl, val * HZ);
+               break;
+       case TCP_KEEPCNT:
+               if (val < 1 || val > MAX_TCP_KEEPCNT)
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch

new file mode 100644 (file)

index 0000000..8df9973
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch
@@ -0,0 +1,69 @@
+From 8b50db4f550c9b4fa395cb961dd7c9ab6b4ac010 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:51 +0000
+Subject: tcp: annotate data-races around tp->keepalive_probes
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 6e5e1de616bf5f3df1769abc9292191dfad9110a ]
+
+do_tcp_getsockopt() reads tp->keepalive_probes while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 9 +++++++--
+ net/ipv4/tcp.c    | 5 +++--
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index f39c44cbdfe62..9733d8e4f10af 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1535,9 +1535,14 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
+ static inline int keepalive_probes(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
++      int val;
++
++      /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
++       * and do_tcp_setsockopt().
++       */
++      val = READ_ONCE(tp->keepalive_probes);
+ 
+-      return tp->keepalive_probes ? :
+-              READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
++      return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ }
+ 
+ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index d19cfeb78392d..7d75928ea0f9c 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3463,7 +3463,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
+               return -EINVAL;
+ 
+       lock_sock(sk);
+-      tcp_sk(sk)->keepalive_probes = val;
++      /* Paired with READ_ONCE() in keepalive_probes() */
++      WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
+       release_sock(sk);
+       return 0;
+ }
+@@ -3671,7 +3672,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               if (val < 1 || val > MAX_TCP_KEEPCNT)
+                       err = -EINVAL;
+               else
+-                      tp->keepalive_probes = val;
++                      WRITE_ONCE(tp->keepalive_probes, val);
+               break;
+       case TCP_SYNCNT:
+               if (val < 1 || val > MAX_TCP_SYNCNT)
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch

new file mode 100644 (file)

index 0000000..5c5aa55
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch
@@ -0,0 +1,58 @@
+From 9121aedbe1355d93c6f3ab514d0878a9099021f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:49 +0000
+Subject: tcp: annotate data-races around tp->keepalive_time
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 4164245c76ff906c9086758e1c3f87082a7f5ef5 ]
+
+do_tcp_getsockopt() reads tp->keepalive_time while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 7 +++++--
+ net/ipv4/tcp.c    | 3 ++-
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 5eedd476a38d7..397c248102415 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1519,9 +1519,12 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
+ static inline int keepalive_time_when(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
++      int val;
+ 
+-      return tp->keepalive_time ? :
+-              READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
++      /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
++      val = READ_ONCE(tp->keepalive_time);
++
++      return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ }
+ 
+ static inline int keepalive_probes(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 3edf7a1c5cbd2..c0d7b226bca1a 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3418,7 +3418,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
+       if (val < 1 || val > MAX_TCP_KEEPIDLE)
+               return -EINVAL;
+ 
+-      tp->keepalive_time = val * HZ;
++      /* Paired with WRITE_ONCE() in keepalive_time_when() */
++      WRITE_ONCE(tp->keepalive_time, val * HZ);
+       if (sock_flag(sk, SOCK_KEEPOPEN) &&
+           !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+               u32 elapsed = keepalive_time_elapsed(tp);
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch b/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch

new file mode 100644 (file)

index 0000000..4c9751d
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch
@@ -0,0 +1,52 @@
+From 3d98c816d1920605a924d0ead6bf2be144e81749 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:53 +0000
+Subject: tcp: annotate data-races around tp->linger2
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ]
+
+do_tcp_getsockopt() reads tp->linger2 while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index ffa9717293358..363535b6ece83 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3691,11 +3691,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ 
+       case TCP_LINGER2:
+               if (val < 0)
+-                      tp->linger2 = -1;
++                      WRITE_ONCE(tp->linger2, -1);
+               else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+-                      tp->linger2 = TCP_FIN_TIMEOUT_MAX;
++                      WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+               else
+-                      tp->linger2 = val * HZ;
++                      WRITE_ONCE(tp->linger2, val * HZ);
+               break;
+ 
+       case TCP_DEFER_ACCEPT:
+@@ -4099,7 +4099,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+                       READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+               break;
+       case TCP_LINGER2:
+-              val = tp->linger2;
++              val = READ_ONCE(tp->linger2);
+               if (val >= 0)
+                       val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+               break;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch b/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch

new file mode 100644 (file)

index 0000000..76a913e
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch
@@ -0,0 +1,64 @@
+From e13aeaa389758176f64c75eeb7dd1bf6ebee1871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:55 +0000
+Subject: tcp: annotate data-races around tp->notsent_lowat
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ]
+
+tp->notsent_lowat can be read locklessly from do_tcp_getsockopt()
+and tcp_poll().
+
+Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 6 +++++-
+ net/ipv4/tcp.c    | 4 ++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 9733d8e4f10af..e9c8f88f47696 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -2059,7 +2059,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
+ {
+       struct net *net = sock_net((struct sock *)tp);
+-      return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
++      u32 val;
++
++      val = READ_ONCE(tp->notsent_lowat);
++
++      return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ }
+ 
+ bool tcp_stream_memory_free(const struct sock *sk, int wake);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index bc3ad48f92389..6f3a494b965ae 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3770,7 +3770,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               err = tcp_repair_set_window(tp, optval, optlen);
+               break;
+       case TCP_NOTSENT_LOWAT:
+-              tp->notsent_lowat = val;
++              WRITE_ONCE(tp->notsent_lowat, val);
+               sk->sk_write_space(sk);
+               break;
+       case TCP_INQ:
+@@ -4266,7 +4266,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+               break;
+       case TCP_NOTSENT_LOWAT:
+-              val = tp->notsent_lowat;
++              val = READ_ONCE(tp->notsent_lowat);
+               break;
+       case TCP_INQ:
+               val = tp->recvmsg_inq;
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch b/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch

new file mode 100644 (file)

index 0000000..89755e2
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch
@@ -0,0 +1,46 @@
+From acc05127977764c50f101313e03fed5dd0b7728e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:47 +0000
+Subject: tcp: annotate data-races around tp->tcp_tx_delay
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ]
+
+do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu
+might change its value.
+
+Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 0bd0be3c63d22..5e4bc80dc0ae5 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3780,7 +3780,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+       case TCP_TX_DELAY:
+               if (val)
+                       tcp_enable_tx_delay();
+-              tp->tcp_tx_delay = val;
++              WRITE_ONCE(tp->tcp_tx_delay, val);
+               break;
+       default:
+               err = -ENOPROTOOPT;
+@@ -4256,7 +4256,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_TX_DELAY:
+-              val = tp->tcp_tx_delay;
++              val = READ_ONCE(tp->tcp_tx_delay);
+               break;
+ 
+       case TCP_TIMESTAMP:
+-- 
+2.39.2
+
diff --git a/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch b/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch

new file mode 100644 (file)

index 0000000..b1de5b6
--- /dev/null
+++ b/queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch
@@ -0,0 +1,63 @@
+From 5cb5df7c5c218e8bc062747711555eb97a17ceb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:48 +0000
+Subject: tcp: annotate data-races around tp->tsoffset
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dd23c9f1e8d5c1d2e3d29393412385ccb9c7a948 ]
+
+do_tcp_getsockopt() reads tp->tsoffset while another cpu
+might change its value.
+
+Fixes: 93be6ce0e91b ("tcp: set and get per-socket timestamp")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c      | 4 ++--
+ net/ipv4/tcp_ipv4.c | 5 +++--
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 5e4bc80dc0ae5..3edf7a1c5cbd2 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3762,7 +3762,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+               if (!tp->repair)
+                       err = -EPERM;
+               else
+-                      tp->tsoffset = val - tcp_time_stamp_raw();
++                      WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
+               break;
+       case TCP_REPAIR_WINDOW:
+               err = tcp_repair_set_window(tp, optval, optlen);
+@@ -4260,7 +4260,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+               break;
+ 
+       case TCP_TIMESTAMP:
+-              val = tcp_time_stamp_raw() + tp->tsoffset;
++              val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+               break;
+       case TCP_NOTSENT_LOWAT:
+               val = tp->notsent_lowat;
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index d49a66b271d52..9a8d59e9303a0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -307,8 +307,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+                                                 inet->inet_daddr,
+                                                 inet->inet_sport,
+                                                 usin->sin_port));
+-              tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
+-                                               inet->inet_daddr);
++              WRITE_ONCE(tp->tsoffset,
++                         secure_tcp_ts_off(net, inet->inet_saddr,
++                                           inet->inet_daddr));
+       }
+ 
+       inet->inet_id = get_random_u16();
+-- 
+2.39.2
+
diff --git a/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch b/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch

new file mode 100644 (file)

index 0000000..c51ebdb
--- /dev/null
+++ b/queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch
@@ -0,0 +1,41 @@
+From 3af33ea1ad72a1fc6ed5074f0ce9e16cc52c818e Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 21 Jun 2023 11:32:35 +0200
+Subject: [PATCH AUTOSEL 4.19 07/11] udf: Fix uninitialized array access for
+ some pathnames
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 028f6055c912588e6f72722d89c30b401bbcf013 ]
+
+For filenames that begin with . and are between 2 and 5 characters long,
+UDF charset conversion code would read uninitialized memory in the
+output buffer. The only practical impact is that the name may be prepended a
+"unification hash" when it is not actually needed but still it is good
+to fix this.
+
+Reported-by: syzbot+cd311b1e43cc25f90d18@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/all/000000000000e2638a05fe9dc8f9@google.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/unicode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
+index 5fcfa96463ebb..85521d6b02370 100644
+--- a/fs/udf/unicode.c
++++ b/fs/udf/unicode.c
+@@ -247,7 +247,7 @@ static int udf_name_from_CS0(struct super_block *sb,
+       }
+ 
+       if (translate) {
+-              if (str_o_len <= 2 && str_o[0] == '.' &&
++              if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' &&
+                   (str_o_len == 1 || str_o[1] == '.'))
+                       needsCRC = 1;
+               if (needsCRC) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch b/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch

new file mode 100644 (file)

index 0000000..0a2b809
--- /dev/null
+++ b/queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch
@@ -0,0 +1,137 @@
+From 1c0a043a5b5d55b841bdb8e72a4e7dbded64e33b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 May 2023 12:41:06 +0300
+Subject: wifi: ath11k: add support default regdb while searching board-2.bin
+ for WCN6855
+
+From: Wen Gong <quic_wgong@quicinc.com>
+
+[ Upstream commit 88ca89202f8e8afb5225eb5244d79cd67c15d744 ]
+
+Sometimes board-2.bin does not have the regdb data which matched the
+parameters such as vendor, device, subsystem-vendor, subsystem-device
+and etc. Add default regdb data with 'bus=%s' into board-2.bin for
+WCN6855, then ath11k use 'bus=pci' to search regdb data in board-2.bin
+for WCN6855.
+
+kernel: [  122.515808] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262'
+kernel: [  122.517240] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564
+kernel: [  122.517280] ath11k_pci 0000:03:00.0: failed to fetch regdb data for bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262 from ath11k/WCN6855/hw2.0/board-2.bin
+kernel: [  122.517464] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci'
+kernel: [  122.518901] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564
+kernel: [  122.518915] ath11k_pci 0000:03:00.0: board name
+kernel: [  122.518917] ath11k_pci 0000:03:00.0: 00000000: 62 75 73 3d 70 63 69                             bus=pci
+kernel: [  122.518918] ath11k_pci 0000:03:00.0: boot found match regdb data for name 'bus=pci'
+kernel: [  122.518920] ath11k_pci 0000:03:00.0: boot found regdb data for 'bus=pci'
+kernel: [  122.518921] ath11k_pci 0000:03:00.0: fetched regdb
+
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230517133959.8224-1-quic_wgong@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/core.c | 53 +++++++++++++++++++-------
+ 1 file changed, 40 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
+index b99180bc81723..893fefadbba96 100644
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -870,7 +870,8 @@ int ath11k_core_check_dt(struct ath11k_base *ab)
+ }
+ 
+ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+-                                         size_t name_len, bool with_variant)
++                                         size_t name_len, bool with_variant,
++                                         bool bus_type_mode)
+ {
+       /* strlen(',variant=') + strlen(ab->qmi.target.bdf_ext) */
+       char variant[9 + ATH11K_QMI_BDF_EXT_STR_LENGTH] = { 0 };
+@@ -881,15 +882,20 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+ 
+       switch (ab->id.bdf_search) {
+       case ATH11K_BDF_SEARCH_BUS_AND_BOARD:
+-              scnprintf(name, name_len,
+-                        "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
+-                        ath11k_bus_str(ab->hif.bus),
+-                        ab->id.vendor, ab->id.device,
+-                        ab->id.subsystem_vendor,
+-                        ab->id.subsystem_device,
+-                        ab->qmi.target.chip_id,
+-                        ab->qmi.target.board_id,
+-                        variant);
++              if (bus_type_mode)
++                      scnprintf(name, name_len,
++                                "bus=%s",
++                                ath11k_bus_str(ab->hif.bus));
++              else
++                      scnprintf(name, name_len,
++                                "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
++                                ath11k_bus_str(ab->hif.bus),
++                                ab->id.vendor, ab->id.device,
++                                ab->id.subsystem_vendor,
++                                ab->id.subsystem_device,
++                                ab->qmi.target.chip_id,
++                                ab->qmi.target.board_id,
++                                variant);
+               break;
+       default:
+               scnprintf(name, name_len,
+@@ -908,13 +914,19 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+ static int ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+                                        size_t name_len)
+ {
+-      return __ath11k_core_create_board_name(ab, name, name_len, true);
++      return __ath11k_core_create_board_name(ab, name, name_len, true, false);
+ }
+ 
+ static int ath11k_core_create_fallback_board_name(struct ath11k_base *ab, char *name,
+                                                 size_t name_len)
+ {
+-      return __ath11k_core_create_board_name(ab, name, name_len, false);
++      return __ath11k_core_create_board_name(ab, name, name_len, false, false);
++}
++
++static int ath11k_core_create_bus_type_board_name(struct ath11k_base *ab, char *name,
++                                                size_t name_len)
++{
++      return __ath11k_core_create_board_name(ab, name, name_len, false, true);
+ }
+ 
+ const struct firmware *ath11k_core_firmware_request(struct ath11k_base *ab,
+@@ -1218,7 +1230,7 @@ int ath11k_core_fetch_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd)
+ 
+ int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd)
+ {
+-      char boardname[BOARD_NAME_SIZE];
++      char boardname[BOARD_NAME_SIZE], default_boardname[BOARD_NAME_SIZE];
+       int ret;
+ 
+       ret = ath11k_core_create_board_name(ab, boardname, BOARD_NAME_SIZE);
+@@ -1235,6 +1247,21 @@ int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd
+       if (!ret)
+               goto exit;
+ 
++      ret = ath11k_core_create_bus_type_board_name(ab, default_boardname,
++                                                   BOARD_NAME_SIZE);
++      if (ret) {
++              ath11k_dbg(ab, ATH11K_DBG_BOOT,
++                         "failed to create default board name for regdb: %d", ret);
++              goto exit;
++      }
++
++      ret = ath11k_core_fetch_board_data_api_n(ab, bd, default_boardname,
++                                               ATH11K_BD_IE_REGDB,
++                                               ATH11K_BD_IE_REGDB_NAME,
++                                               ATH11K_BD_IE_REGDB_DATA);
++      if (!ret)
++              goto exit;
++
+       ret = ath11k_core_fetch_board_data_api_1(ab, bd, ATH11K_REGDB_FILE_NAME);
+       if (ret)
+               ath11k_dbg(ab, ATH11K_DBG_BOOT, "failed to fetch %s from %s\n",
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch b/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch

new file mode 100644 (file)

index 0000000..94851f5
--- /dev/null
+++ b/queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch
@@ -0,0 +1,63 @@
+From d4bcf71d3c456ca0656ec111454eda83581a3d2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 14:41:28 +0530
+Subject: wifi: ath11k: fix memory leak in WMI firmware stats
+
+From: P Praneesh <quic_ppranees@quicinc.com>
+
+[ Upstream commit 6aafa1c2d3e3fea2ebe84c018003f2a91722e607 ]
+
+Memory allocated for firmware pdev, vdev and beacon statistics
+are not released during rmmod.
+
+Fix it by calling ath11k_fw_stats_free() function before hardware
+unregister.
+
+While at it, avoid calling ath11k_fw_stats_free() while processing
+the firmware stats received in the WMI event because the local list
+is getting spliced and reinitialised and hence there are no elements
+in the list after splicing.
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230606091128.14202-1-quic_adisi@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/mac.c | 1 +
+ drivers/net/wireless/ath/ath11k/wmi.c | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index b19d44b3f5dfb..cb77dd6ce9665 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -9279,6 +9279,7 @@ void ath11k_mac_destroy(struct ath11k_base *ab)
+               if (!ar)
+                       continue;
+ 
++              ath11k_fw_stats_free(&ar->fw_stats);
+               ieee80211_free_hw(ar->hw);
+               pdev->ar = NULL;
+       }
+diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
+index fad9f8d308a20..3e0a47f4a3ebd 100644
+--- a/drivers/net/wireless/ath/ath11k/wmi.c
++++ b/drivers/net/wireless/ath/ath11k/wmi.c
+@@ -7590,6 +7590,11 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk
+       rcu_read_unlock();
+       spin_unlock_bh(&ar->data_lock);
+ 
++      /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised
++       * at this point, no need to free the individual list.
++       */
++      return;
++
+ free:
+       ath11k_fw_stats_free(&stats);
+ }
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch b/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch

new file mode 100644 (file)

index 0000000..38a0624
--- /dev/null
+++ b/queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch
@@ -0,0 +1,71 @@
+From 885bcbfa0c9659fa068668223c2f45c63640b4c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Apr 2023 16:54:45 +0200
+Subject: wifi: ath11k: fix registration of 6Ghz-only phy without the full
+ channel range
+
+From: Maxime Bizon <mbizon@freebox.fr>
+
+[ Upstream commit e2ceb1de2f83aafd8003f0b72dfd4b7441e97d14 ]
+
+Because of what seems to be a typo, a 6Ghz-only phy for which the BDF
+does not allow the 7115Mhz channel will fail to register:
+
+  WARNING: CPU: 2 PID: 106 at net/wireless/core.c:907 wiphy_register+0x914/0x954
+  Modules linked in: ath11k_pci sbsa_gwdt
+  CPU: 2 PID: 106 Comm: kworker/u8:5 Not tainted 6.3.0-rc7-next-20230418-00549-g1e096a17625a-dirty #9
+  Hardware name: Freebox V7R Board (DT)
+  Workqueue: ath11k_qmi_driver_event ath11k_qmi_driver_event_work
+  pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+  pc : wiphy_register+0x914/0x954
+  lr : ieee80211_register_hw+0x67c/0xc10
+  sp : ffffff800b123aa0
+  x29: ffffff800b123aa0 x28: 0000000000000000 x27: 0000000000000000
+  x26: 0000000000000000 x25: 0000000000000006 x24: ffffffc008d51418
+  x23: ffffffc008cb0838 x22: ffffff80176c2460 x21: 0000000000000168
+  x20: ffffff80176c0000 x19: ffffff80176c03e0 x18: 0000000000000014
+  x17: 00000000cbef338c x16: 00000000d2a26f21 x15: 00000000ad6bb85f
+  x14: 0000000000000020 x13: 0000000000000020 x12: 00000000ffffffbd
+  x11: 0000000000000208 x10: 00000000fffffdf7 x9 : ffffffc009394718
+  x8 : ffffff80176c0528 x7 : 000000007fffffff x6 : 0000000000000006
+  x5 : 0000000000000005 x4 : ffffff800b304284 x3 : ffffff800b304284
+  x2 : ffffff800b304d98 x1 : 0000000000000000 x0 : 0000000000000000
+  Call trace:
+   wiphy_register+0x914/0x954
+   ieee80211_register_hw+0x67c/0xc10
+   ath11k_mac_register+0x7c4/0xe10
+   ath11k_core_qmi_firmware_ready+0x1f4/0x570
+   ath11k_qmi_driver_event_work+0x198/0x590
+   process_one_work+0x1b8/0x328
+   worker_thread+0x6c/0x414
+   kthread+0x100/0x104
+   ret_from_fork+0x10/0x20
+  ---[ end trace 0000000000000000 ]---
+  ath11k_pci 0002:01:00.0: ieee80211 registration failed: -22
+  ath11k_pci 0002:01:00.0: failed register the radio with mac80211: -22
+  ath11k_pci 0002:01:00.0: failed to create pdev core: -22
+
+Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230421145445.2612280-1-mbizon@freebox.fr
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/mac.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index ef7617802491e..b19d44b3f5dfb 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -8715,7 +8715,7 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar,
+       }
+ 
+       if (supported_bands & WMI_HOST_WLAN_5G_CAP) {
+-              if (reg_cap->high_5ghz_chan >= ATH11K_MAX_6G_FREQ) {
++              if (reg_cap->high_5ghz_chan >= ATH11K_MIN_6G_FREQ) {
+                       channels = kmemdup(ath11k_6ghz_channels,
+                                          sizeof(ath11k_6ghz_channels), GFP_KERNEL);
+                       if (!channels) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch b/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch

new file mode 100644 (file)

index 0000000..f23938a
--- /dev/null
+++ b/queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch
@@ -0,0 +1,43 @@
+From 1a37162f09f199864048ac62ae05cc6310aef58f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 13:03:59 +0300
+Subject: wifi: iwlwifi: Add support for new PCI Id
+
+From: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
+
+[ Upstream commit 35bd6f1d043d089fcb60450e1287cc65f0095787 ]
+
+Add support for the PCI Id 51F1 without IMR support.
+
+Signed-off-by: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230620125813.9800e652e789.Ic06a085832ac3f988c8ef07d856c8e281563295d@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index f6872b2a0d9d0..d5bd869086458 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -495,6 +495,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+       {IWL_PCI_DEVICE(0x7AF0, PCI_ANY_ID, iwl_so_trans_cfg)},
+       {IWL_PCI_DEVICE(0x51F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+       {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_imr_trans_cfg)},
++      {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+       {IWL_PCI_DEVICE(0x54F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+       {IWL_PCI_DEVICE(0x7F70, PCI_ANY_ID, iwl_so_trans_cfg)},
+ 
+@@ -543,6 +544,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+       IWL_DEV_INFO(0x51F0, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_160_name),
+       IWL_DEV_INFO(0x51F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+       IWL_DEV_INFO(0x51F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
++      IWL_DEV_INFO(0x51F1, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
+       IWL_DEV_INFO(0x54F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+       IWL_DEV_INFO(0x54F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
+       IWL_DEV_INFO(0x7A70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch b/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch

new file mode 100644 (file)

index 0000000..bbc9789
--- /dev/null
+++ b/queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch
@@ -0,0 +1,47 @@
+From dd01d6d149a5c58b8f2f7d9e9211ce28c8befd64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 13:04:02 +0300
+Subject: wifi: iwlwifi: mvm: avoid baid size integer overflow
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ]
+
+Roee reported various hard-to-debug crashes with pings in
+EHT aggregation scenarios. Enabling KASAN showed that we
+access the BAID allocation out of bounds, and looking at
+the code a bit shows that since the reorder buffer entry
+(struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug
+such as lockdep is enabled, then staring from an agg size
+512 we overflow the size calculation, and allocate a much
+smaller structure than we should, causing slab corruption
+once we initialize this.
+
+Fix this by simply using u32 instead of u16.
+
+Reported-by: Roee Goldfiner <roee.h.goldfiner@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+index 013aca70c3d3b..6b52afcf02721 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -2738,7 +2738,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+       }
+ 
+       if (iwl_mvm_has_new_rx_api(mvm) && start) {
+-              u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
++              u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
+ 
+               /* sparse doesn't like the __align() so don't check */
+ #ifndef __CHECKER__
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch b/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch

new file mode 100644 (file)

index 0000000..5b4e166
--- /dev/null
+++ b/queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch
@@ -0,0 +1,38 @@
+From 80c181a4bc2b86eb00ab6e09dcbcdda26aa6fc13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 13:12:20 +0300
+Subject: wifi: iwlwifi: pcie: add device id 51F1 for killer 1675
+
+From: Yi Kuo <yi@yikuo.dev>
+
+[ Upstream commit f4daceae4087bbb3e9a56044b44601d520d009d2 ]
+
+Intel Killer AX1675i/s with device id 51f1 would show
+"No config found for PCI dev 51f1/1672" in dmesg and refuse to work.
+Add the new device id 51F1 for 1675i/s to fix the issue.
+
+Signed-off-by: Yi Kuo <yi@yikuo.dev>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230621130444.ee224675380b.I921c905e21e8d041ad808def8f454f27b5ebcd8b@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index d5bd869086458..4d4db5f6836be 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -683,6 +683,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+       IWL_DEV_INFO(0x2726, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+       IWL_DEV_INFO(0x51F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+       IWL_DEV_INFO(0x51F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
++      IWL_DEV_INFO(0x51F1, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
++      IWL_DEV_INFO(0x51F1, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+       IWL_DEV_INFO(0x54F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+       IWL_DEV_INFO(0x54F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+       IWL_DEV_INFO(0x7A70, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch b/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch

new file mode 100644 (file)

index 0000000..3a94dfe
--- /dev/null
+++ b/queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch
@@ -0,0 +1,46 @@
+From a7163d690f5af8b426d97da0807e07b334cb5bdb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 4 Jun 2023 12:11:27 +0300
+Subject: wifi: mac80211_hwsim: Fix possible NULL dereference
+
+From: Ilan Peer <ilan.peer@intel.com>
+
+[ Upstream commit 0cc80943ef518a1c51a1111e9346d1daf11dd545 ]
+
+In a call to mac80211_hwsim_select_tx_link() the sta pointer might
+be NULL, thus need to check that it is not NULL before accessing it.
+
+Signed-off-by: Ilan Peer <ilan.peer@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230604120651.f4d889fc98c4.Iae85f527ed245a37637a874bb8b8c83d79812512@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/mac80211_hwsim.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
+index 0d81098c7b45c..da5c355405f68 100644
+--- a/drivers/net/wireless/mac80211_hwsim.c
++++ b/drivers/net/wireless/mac80211_hwsim.c
+@@ -4,7 +4,7 @@
+  * Copyright (c) 2008, Jouni Malinen <j@w1.fi>
+  * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
+  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+- * Copyright (C) 2018 - 2022 Intel Corporation
++ * Copyright (C) 2018 - 2023 Intel Corporation
+  */
+ 
+ /*
+@@ -1753,7 +1753,7 @@ mac80211_hwsim_select_tx_link(struct mac80211_hwsim_data *data,
+ 
+       WARN_ON(is_multicast_ether_addr(hdr->addr1));
+ 
+-      if (WARN_ON_ONCE(!sta->valid_links))
++      if (WARN_ON_ONCE(!sta || !sta->valid_links))
+               return &vif->bss_conf;
+ 
+       for (i = 0; i < ARRAY_SIZE(vif->link_conf); i++) {
+-- 
+2.39.2
+
diff --git a/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch b/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch

new file mode 100644 (file)

index 0000000..2ed2e26
--- /dev/null
+++ b/queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch
@@ -0,0 +1,71 @@
+From 683ebdf526ff6b7d1a58030e79ed32ee6779a0ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 12:04:07 -0600
+Subject: wifi: wext-core: Fix -Wstringop-overflow warning in
+ ioctl_standard_iw_point()
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+[ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ]
+
+-Wstringop-overflow is legitimately warning us about extra_size
+pontentially being zero at some point, hence potenially ending
+up _allocating_ zero bytes of memory for extra pointer and then
+trying to access such object in a call to copy_from_user().
+
+Fix this by adding a sanity check to ensure we never end up
+trying to allocate zero bytes of data for extra pointer, before
+continue executing the rest of the code in the function.
+
+Address the following -Wstringop-overflow warning seen when built
+m68k architecture with allyesconfig configuration:
+                 from net/wireless/wext-core.c:11:
+In function '_copy_from_user',
+    inlined from 'copy_from_user' at include/linux/uaccess.h:183:7,
+    inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7:
+arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=]
+   48 | #define memset(d, c, n) __builtin_memset(d, c, n)
+      |                         ^~~~~~~~~~~~~~~~~~~~~~~~~
+include/linux/uaccess.h:153:17: note: in expansion of macro 'memset'
+  153 |                 memset(to + (n - res), 0, res);
+      |                 ^~~~~~
+In function 'kmalloc',
+    inlined from 'kzalloc' at include/linux/slab.h:694:9,
+    inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10:
+include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc'
+  577 |         return __kmalloc(size, flags);
+      |                ^~~~~~~~~~~~~~~~~~~~~~
+
+This help with the ongoing efforts to globally enable
+-Wstringop-overflow.
+
+Link: https://github.com/KSPP/linux/issues/315
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/wext-core.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index fe8765c4075d3..8a4b85f96a13a 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -799,6 +799,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+               }
+       }
+ 
++      /* Sanity-check to ensure we never end up _allocating_ zero
++       * bytes of data for extra.
++       */
++      if (extra_size <= 0)
++              return -EFAULT;
++
+       /* kzalloc() ensures NULL-termination for essid_compat. */
+       extra = kzalloc(extra_size, GFP_KERNEL);
+       if (!extra)
+-- 
+2.39.2
+
author	Sasha Levin <sashal@kernel.org>
	Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)
committer	Sasha Levin <sashal@kernel.org>
	Mon, 24 Jul 2023 01:25:06 +0000 (21:25 -0400)
queue-6.1/acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-hci_event-call-disconnect-callback-before-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-address-kcsan-report-on-bpf_lru_list.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-print-a-warning-only-if-writing-to-unprivileged_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-repeat-check_max_stack_depth-for-async-callbacks.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/bridge-add-extack-warning-when-enabling-stp-in-netns.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/cifs-fix-mid-leak-during-reconnection-after-timeout-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/devlink-report-devlink_port_type_warn-source-device.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fbdev-imxfb-removed-unneeded-release_mem_region.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fbdev-imxfb-warn-about-invalid-left-right-margin.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-fix-reset-task-race-with-iavf_remove.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-fix-use-after-free-in-free_netdev.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-make-functions-static-where-possible.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-move-netdev_update_features-into-watchdog-task.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-send-vlan-offloading-caps-once-after-vfr.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-use-internal-state-to-free-traffic-irqs.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/iavf-wait-for-reset-in-callbacks-which-trigger-it.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/igb-fix-igb_down-hung-on-surprise-removal.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/igc-avoid-transmit-queue-timeout-for-xdp.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kallsyms-correctly-sequence-symbols-when-config_lto_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kallsyms-improve-the-performance-of-kallsyms_lookup_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/llc-don-t-drop-packet-from-non-root-netns.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/mips-dec-prom-address-warray-bounds-warning.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ethernet-litex-add-support-for-64-bit-stats.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ethernet-mtk_eth_soc-handle-probe-deferral.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv4-use-kfree_sensitive-instead-of-kfree.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-ipv6-check-return-value-of-pskb_trim.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netfilter-nf_tables-fix-spurious-set-element-inserti.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/netfilter-nft_set_pipapo-fix-improper-element-remova.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/perf-build-fix-library-not-found-error-when-using-cs.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/quota-fix-warning-in-dqgrab.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/quota-properly-disable-quotas-when-add_dquot_ref-fai.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-fair-don-t-balance-task-to-its-current-running.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-allow-unprivileged-polling-of-n-2s-period.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-extract-update_triggers-side-effect.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-rearrange-polling-code-in-preparation.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-rename-existing-poll-members-in-preparatio.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/security-keys-modify-mismatched-function-name.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/series		patch \| blob \| blame \| history
queue-6.1/spi-bcm63xx-fix-max-prepend-length.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/spi-dw-add-compatible-for-intel-mount-evans-soc.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-fastopenq.max_qlen.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-rskq_defer_accept.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-keepalive_intvl.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-keepalive_probes.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-keepalive_time.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-linger2.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-notsent_lowat.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-tcp_tx_delay.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/tcp-annotate-data-races-around-tp-tsoffset.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/udf-fix-uninitialized-array-access-for-some-pathname.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-ath11k-add-support-default-regdb-while-searchin.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-iwlwifi-add-support-for-new-pci-id.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-mac80211_hwsim-fix-possible-null-dereference.patch	[new file with mode: 0644]	patch \| blob
queue-6.1/wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch	[new file with mode: 0644]	patch \| blob