--- /dev/null
+From 92bf9e7e60ec477f33e9520a2f8ed58c717a4f9b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 20:45:04 +0200
+Subject: ACPI: video: Add backlight=native DMI quirk for Dell Studio 1569
+
+From: Hans de Goede <hdegoede@redhat.com>
+
+[ Upstream commit 23d28cc0444be3f694eb986cd653b6888b78431d ]
+
+The Dell Studio 1569 predates Windows 8, so it defaults to using
+acpi_video# for backlight control, but this is non functional on
+this model.
+
+Add a DMI quirk to use the native intel_backlight interface which
+does work properly.
+
+Reported-by: raycekarneal <raycekarneal@gmail.com>
+Signed-off-by: Hans de Goede <hdegoede@redhat.com>
+Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/acpi/video_detect.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/acpi/video_detect.c b/drivers/acpi/video_detect.c
+index 4a77e7e6e3fa0..c8dd7f7407da2 100644
+--- a/drivers/acpi/video_detect.c
++++ b/drivers/acpi/video_detect.c
+@@ -526,6 +526,14 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
+ DMI_MATCH(DMI_PRODUCT_NAME, "Precision 7510"),
+ },
+ },
++ {
++ .callback = video_detect_force_native,
++ /* Dell Studio 1569 */
++ .matches = {
++ DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
++ DMI_MATCH(DMI_PRODUCT_NAME, "Studio 1569"),
++ },
++ },
+ {
+ .callback = video_detect_force_native,
+ /* Acer Aspire 3830TG */
+--
+2.39.2
+
--- /dev/null
+From af0f59a65f332284ca2bf7579e4158dff37dc62d Mon Sep 17 00:00:00 2001
+From: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
+Date: Wed, 10 May 2023 19:39:05 +0200
+Subject: [PATCH AUTOSEL 4.19 02/11] ALSA: emu10k1: roll up loops in DSP setup
+ code for Audigy
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 8cabf83c7aa54530e699be56249fb44f9505c4f3 ]
+
+There is no apparent reason for the massive code duplication.
+
+Signed-off-by: Oswald Buddenhagen <oswald.buddenhagen@gmx.de>
+Link: https://lore.kernel.org/r/20230510173917.3073107-3-oswald.buddenhagen@gmx.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/emu10k1/emufx.c | 112 +++-----------------------------------
+ 1 file changed, 9 insertions(+), 103 deletions(-)
+
+diff --git a/sound/pci/emu10k1/emufx.c b/sound/pci/emu10k1/emufx.c
+index 1f25e6d029d82..84d98c098b744 100644
+--- a/sound/pci/emu10k1/emufx.c
++++ b/sound/pci/emu10k1/emufx.c
+@@ -1550,14 +1550,8 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+ gpr += 2;
+
+ /* Master volume (will be renamed later) */
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+0+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+1+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+2+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+3+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+4+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+5+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+6+SND_EMU10K1_PLAYBACK_CHANNELS));
+- A_OP(icode, &ptr, iMAC0, A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+7+SND_EMU10K1_PLAYBACK_CHANNELS));
++ for (z = 0; z < 8; z++)
++ A_OP(icode, &ptr, iMAC0, A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS), A_C_00000000, A_GPR(gpr), A_GPR(playback+z+SND_EMU10K1_PLAYBACK_CHANNELS));
+ snd_emu10k1_init_mono_control(&controls[nctl++], "Wave Master Playback Volume", gpr, 0);
+ gpr += 2;
+
+@@ -1641,102 +1635,14 @@ A_OP(icode, &ptr, iMAC0, A_GPR(var), A_GPR(var), A_GPR(vol), A_EXTIN(input))
+ dev_dbg(emu->card->dev, "emufx.c: gpr=0x%x, tmp=0x%x\n",
+ gpr, tmp);
+ */
+- /* For the EMU1010: How to get 32bit values from the DSP. High 16bits into L, low 16bits into R. */
+- /* A_P16VIN(0) is delayed by one sample,
+- * so all other A_P16VIN channels will need to also be delayed
+- */
+- /* Left ADC in. 1 of 2 */
+ snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_P16VIN(0x0), A_FXBUS2(0) );
+- /* Right ADC in 1 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- /* Delaying by one sample: instead of copying the input
+- * value A_P16VIN to output A_FXBUS2 as in the first channel,
+- * we use an auxiliary register, delaying the value by one
+- * sample
+- */
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(2) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x1), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(4) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x2), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(6) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x3), A_C_00000000, A_C_00000000);
+- /* For 96kHz mode */
+- /* Left ADC in. 2 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0x8) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x4), A_C_00000000, A_C_00000000);
+- /* Right ADC in 2 of 2 */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xa) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x5), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xc) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x6), A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr - 1), A_FXBUS2(0xe) );
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x7), A_C_00000000, A_C_00000000);
+- /* Pavel Hofman - we still have voices, A_FXBUS2s, and
+- * A_P16VINs available -
+- * let's add 8 more capture channels - total of 16
+- */
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x10));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x8),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x12));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0x9),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x14));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xa),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x16));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xb),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x18));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xc),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1a));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xd),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1c));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xe),
+- A_C_00000000, A_C_00000000);
+- gpr_map[gpr++] = 0x00000000;
+- snd_emu10k1_audigy_dsp_convert_32_to_2x16(icode, &ptr, tmp,
+- bit_shifter16,
+- A_GPR(gpr - 1),
+- A_FXBUS2(0x1e));
+- A_OP(icode, &ptr, iACC3, A_GPR(gpr - 1), A_P16VIN(0xf),
+- A_C_00000000, A_C_00000000);
++ /* A_P16VIN(0) is delayed by one sample, so all other A_P16VIN channels
++ * will need to also be delayed; we use an auxiliary register for that. */
++ for (z = 1; z < 0x10; z++) {
++ snd_emu10k1_audigy_dsp_convert_32_to_2x16( icode, &ptr, tmp, bit_shifter16, A_GPR(gpr), A_FXBUS2(z * 2) );
++ A_OP(icode, &ptr, iACC3, A_GPR(gpr), A_P16VIN(z), A_C_00000000, A_C_00000000);
++ gpr_map[gpr++] = 0x00000000;
++ }
+ }
+
+ #if 0
+--
+2.39.2
+
--- /dev/null
+From 3d60fd0a504a6c9938b831d63bf6bc1a74979fdf Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 09:20:21 +0100
+Subject: ALSA: hda/realtek: Fix generic fixup definition for cs35l41 amp
+
+From: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+
+[ Upstream commit f7b069cf08816252f494d193b9ecdff172bf9aa1 ]
+
+Generic fixup for CS35L41 amplifies should not have vendor specific
+chained fixup. For ThinkPad laptops with led issue, we can just add
+specific fixup.
+
+Fixes: a6ac60b36dade (ALSA: hda/realtek: Fix mute led issue on thinkpad with cs35l41 s-codec)
+Signed-off-by: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+Link: https://lore.kernel.org/r/20230720082022.13033-1-vitalyr@opensource.cirrus.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 25 +++++++++++++++----------
+ 1 file changed, 15 insertions(+), 10 deletions(-)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 1a8ca119ffe45..cb34a62075b13 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -7220,6 +7220,7 @@ enum {
+ ALC287_FIXUP_YOGA9_14IAP7_BASS_SPK_PIN,
+ ALC295_FIXUP_DELL_INSPIRON_TOP_SPEAKERS,
+ ALC236_FIXUP_DELL_DUAL_CODECS,
++ ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI,
+ };
+
+ /* A special fixup for Lenovo C940 and Yoga Duet 7;
+@@ -9090,8 +9091,6 @@ static const struct hda_fixup alc269_fixups[] = {
+ [ALC287_FIXUP_CS35L41_I2C_2] = {
+ .type = HDA_FIXUP_FUNC,
+ .v.func = cs35l41_fixup_i2c_two,
+- .chained = true,
+- .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
+ },
+ [ALC287_FIXUP_CS35L41_I2C_2_HP_GPIO_LED] = {
+ .type = HDA_FIXUP_FUNC,
+@@ -9228,6 +9227,12 @@ static const struct hda_fixup alc269_fixups[] = {
+ .chained = true,
+ .chain_id = ALC255_FIXUP_DELL1_MIC_NO_PRESENCE,
+ },
++ [ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI] = {
++ .type = HDA_FIXUP_FUNC,
++ .v.func = cs35l41_fixup_i2c_two,
++ .chained = true,
++ .chain_id = ALC269_FIXUP_THINKPAD_ACPI,
++ },
+ };
+
+ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+@@ -9750,14 +9755,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
+ SND_PCI_QUIRK(0x17aa, 0x22be, "Thinkpad X1 Carbon 8th", ALC285_FIXUP_THINKPAD_HEADSET_JACK),
+ SND_PCI_QUIRK(0x17aa, 0x22c1, "Thinkpad P1 Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
+ SND_PCI_QUIRK(0x17aa, 0x22c2, "Thinkpad X1 Extreme Gen 3", ALC285_FIXUP_THINKPAD_NO_BASS_SPK_HEADSET_JACK),
+- SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
+- SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2),
++ SND_PCI_QUIRK(0x17aa, 0x22f1, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x22f2, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x22f3, "Thinkpad", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x2316, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x2317, "Thinkpad P1 Gen 6", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x2318, "Thinkpad Z13 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x2319, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
++ SND_PCI_QUIRK(0x17aa, 0x231a, "Thinkpad Z16 Gen2", ALC287_FIXUP_CS35L41_I2C_2_THINKPAD_ACPI),
+ SND_PCI_QUIRK(0x17aa, 0x30bb, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+ SND_PCI_QUIRK(0x17aa, 0x30e2, "ThinkCentre AIO", ALC233_FIXUP_LENOVO_LINE2_MIC_HOTKEY),
+ SND_PCI_QUIRK(0x17aa, 0x310c, "ThinkCentre Station", ALC294_FIXUP_LENOVO_MIC_LOCATION),
+--
+2.39.2
+
--- /dev/null
+From 01fe45bc121655c2ea7d823e3442f3c388fb23b1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 26 Jun 2023 16:23:54 +0530
+Subject: ASoC: amd: acp: fix for invalid dai id handling in
+ acp_get_byte_count()
+
+From: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+
+[ Upstream commit 85aeab362201cf52c34cd429e4f6c75a0b42f9a3 ]
+
+For invalid dai id, instead of returning -EINVAL
+return bytes count as zero in acp_get_byte_count() function.
+
+Fixes: 623621a9f9e1 ("ASoC: amd: Add common framework to support I2S on ACP SOC")
+
+Signed-off-by: Vijendar Mukunda <Vijendar.Mukunda@amd.com>
+Link: https://lore.kernel.org/r/20230626105356.2580125-6-Vijendar.Mukunda@amd.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/amd/acp/amd.h | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/sound/soc/amd/acp/amd.h b/sound/soc/amd/acp/amd.h
+index 5f2119f422715..12a176a50fd6e 100644
+--- a/sound/soc/amd/acp/amd.h
++++ b/sound/soc/amd/acp/amd.h
+@@ -173,7 +173,7 @@ int snd_amd_acp_find_config(struct pci_dev *pci);
+
+ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int direction)
+ {
+- u64 byte_count, low = 0, high = 0;
++ u64 byte_count = 0, low = 0, high = 0;
+
+ if (direction == SNDRV_PCM_STREAM_PLAYBACK) {
+ switch (dai_id) {
+@@ -191,7 +191,7 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
+ break;
+ default:
+ dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
+- return -EINVAL;
++ goto POINTER_RETURN_BYTES;
+ }
+ } else {
+ switch (dai_id) {
+@@ -213,12 +213,13 @@ static inline u64 acp_get_byte_count(struct acp_dev_data *adata, int dai_id, int
+ break;
+ default:
+ dev_err(adata->dev, "Invalid dai id %x\n", dai_id);
+- return -EINVAL;
++ goto POINTER_RETURN_BYTES;
+ }
+ }
+ /* Get 64 bit value from two 32 bit registers */
+ byte_count = (high << 32) | low;
+
++POINTER_RETURN_BYTES:
+ return byte_count;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 8fdb4c209948ee94e6e06e178741f29d84f4e4d5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 13:57:23 +0100
+Subject: ASoC: codecs: wcd938x: fix dB range for HPHL and HPHR
+
+From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+[ Upstream commit c03226ba15fe3c42d13907ec7d8536396602557b ]
+
+dB range for HPHL and HPHR gains are from +6dB to -30dB in steps of
+1.5dB with register values range from 0 to 24.
+
+Current code maps these dB ranges incorrectly, fix them to allow proper
+volume setting.
+
+Fixes: e8ba1e05bdc0 ("ASoC: codecs: wcd938x: add basic controls")
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230705125723.40464-1-srinivas.kandagatla@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index 7715040383840..2316481c2541b 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -210,7 +210,7 @@ struct wcd938x_priv {
+ };
+
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(ear_pa_gain, 600, -1800);
+-static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(line_gain, 600, -3000);
++static const DECLARE_TLV_DB_SCALE(line_gain, -3000, 150, -3000);
+ static const SNDRV_CTL_TLVD_DECLARE_DB_MINMAX(analog_gain, 0, 3000);
+
+ struct wcd938x_mbhc_zdet_param {
+@@ -2662,8 +2662,8 @@ static const struct snd_kcontrol_new wcd938x_snd_controls[] = {
+ wcd938x_get_swr_port, wcd938x_set_swr_port),
+ SOC_SINGLE_EXT("DSD_R Switch", WCD938X_DSD_R, 0, 1, 0,
+ wcd938x_get_swr_port, wcd938x_set_swr_port),
+- SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 0, line_gain),
+- SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 0, line_gain),
++ SOC_SINGLE_TLV("HPHL Volume", WCD938X_HPH_L_EN, 0, 0x18, 1, line_gain),
++ SOC_SINGLE_TLV("HPHR Volume", WCD938X_HPH_R_EN, 0, 0x18, 1, line_gain),
+ WCD938X_EAR_PA_GAIN_TLV("EAR_PA Volume", WCD938X_ANA_EAR_COMPANDER_CTL,
+ 2, 0x10, 0, ear_pa_gain),
+ SOC_SINGLE_EXT("ADC1 Switch", WCD938X_ADC1, 1, 1, 0,
+--
+2.39.2
+
--- /dev/null
+From 6837fd2094a0338619e2fbd26039c39ad53d3cf8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 16:27:13 +0200
+Subject: ASoC: codecs: wcd938x: fix mbhc impedance loglevel
+
+From: Johan Hovold <johan+linaro@kernel.org>
+
+[ Upstream commit e5ce198bd5c6923b6a51e1493b1401f84c24b26d ]
+
+Demote the MBHC impedance measurement printk, which is not an error
+message, from error to debug level.
+
+While at it, fix the capitalisation of "ohm" and add the missing space
+before the opening parenthesis.
+
+Fixes: bcee7ed09b8e ("ASoC: codecs: wcd938x: add Multi Button Headset Control support")
+Signed-off-by: Johan Hovold <johan+linaro@kernel.org>
+Reviewed-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230630142717.5314-2-johan+linaro@kernel.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/codecs/wcd938x.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/codecs/wcd938x.c b/sound/soc/codecs/wcd938x.c
+index df0b3ac7f1321..7715040383840 100644
+--- a/sound/soc/codecs/wcd938x.c
++++ b/sound/soc/codecs/wcd938x.c
+@@ -2165,8 +2165,8 @@ static inline void wcd938x_mbhc_get_result_params(struct wcd938x_priv *wcd938x,
+ else if (x1 < minCode_param[noff])
+ *zdet = WCD938X_ZDET_FLOATING_IMPEDANCE;
+
+- pr_err("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d(milliOhm)\n",
+- __func__, d1, c1, x1, *zdet);
++ pr_debug("%s: d1=%d, c1=%d, x1=0x%x, z_val=%d (milliohm)\n",
++ __func__, d1, c1, x1, *zdet);
+ ramp_down:
+ i = 0;
+ while (x1) {
+--
+2.39.2
+
--- /dev/null
+From 4b2b48aa8c43caaeef24802e4265e3ba2daa7ba5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 5 Jul 2023 14:18:42 +0100
+Subject: ASoC: qcom: q6apm: do not close GPR port before closing graph
+
+From: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+
+[ Upstream commit c1be62923d4d86e7c06b1224626e27eb8d9ab32e ]
+
+Closing GPR port before graph close can result in un handled notifications
+from DSP, this results in spam of errors from GPR driver as there is no
+one to handle these notification at that point in time.
+
+Fix this by closing GPR port after graph close is finished.
+
+Fixes: 5477518b8a0e ("ASoC: qdsp6: audioreach: add q6apm support")
+Signed-off-by: Srinivas Kandagatla <srinivas.kandagatla@linaro.org>
+Link: https://lore.kernel.org/r/20230705131842.41584-1-srinivas.kandagatla@linaro.org
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/qcom/qdsp6/q6apm.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/sound/soc/qcom/qdsp6/q6apm.c b/sound/soc/qcom/qdsp6/q6apm.c
+index 794019286c704..16acdf3a99e1c 100644
+--- a/sound/soc/qcom/qdsp6/q6apm.c
++++ b/sound/soc/qcom/qdsp6/q6apm.c
+@@ -515,6 +515,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
+
+ switch (hdr->opcode) {
+ case DATA_CMD_RSP_WR_SH_MEM_EP_DATA_BUFFER_DONE_V2:
++ if (!graph->ar_graph)
++ break;
+ client_event = APM_CLIENT_EVENT_DATA_WRITE_DONE;
+ mutex_lock(&graph->lock);
+ token = hdr->token & APM_WRITE_TOKEN_MASK;
+@@ -548,6 +550,8 @@ static int graph_callback(struct gpr_resp_pkt *data, void *priv, int op)
+ wake_up(&graph->cmd_wait);
+ break;
+ case DATA_CMD_RSP_RD_SH_MEM_EP_DATA_BUFFER_V2:
++ if (!graph->ar_graph)
++ break;
+ client_event = APM_CLIENT_EVENT_DATA_READ_DONE;
+ mutex_lock(&graph->lock);
+ rd_done = data->payload;
+@@ -650,8 +654,9 @@ int q6apm_graph_close(struct q6apm_graph *graph)
+ {
+ struct audioreach_graph *ar_graph = graph->ar_graph;
+
+- gpr_free_port(graph->port);
++ graph->ar_graph = NULL;
+ kref_put(&ar_graph->refcount, q6apm_put_audioreach_graph);
++ gpr_free_port(graph->port);
+ kfree(graph);
+
+ return 0;
+--
+2.39.2
+
--- /dev/null
+From f51906ec30b0242c56247bae4862008fd7ae2eeb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 14:25:23 +0300
+Subject: ASoC: SOF: ipc3-dtrace: uninitialized data in
+ dfsentry_trace_filter_write()
+
+From: Dan Carpenter <dan.carpenter@linaro.org>
+
+[ Upstream commit 469e2f28c2cbee2430058c1c9bb6d1675d7195fb ]
+
+This doesn't check how many bytes the simple_write_to_buffer() writes to
+the buffer. The only thing that we know is that the first byte is
+initialized and the last byte of the buffer is set to NUL. However
+the middle bytes could be uninitialized.
+
+There is no need to use simple_write_to_buffer(). This code does not
+support partial writes but instead passes "pos = 0" as the starting
+offset regardless of what the user passed as "*ppos". Just use the
+copy_from_user() function and initialize the whole buffer.
+
+Fixes: 671e0b90051e ("ASoC: SOF: Clone the trace code to ipc3-dtrace as fw_tracing implementation")
+Signed-off-by: Dan Carpenter <dan.carpenter@linaro.org>
+Link: https://lore.kernel.org/r/74148292-ce4d-4e01-a1a7-921e6767da14@moroto.mountain
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/ipc3-dtrace.c | 9 ++++-----
+ 1 file changed, 4 insertions(+), 5 deletions(-)
+
+diff --git a/sound/soc/sof/ipc3-dtrace.c b/sound/soc/sof/ipc3-dtrace.c
+index b815b0244d9e4..8cf421577378c 100644
+--- a/sound/soc/sof/ipc3-dtrace.c
++++ b/sound/soc/sof/ipc3-dtrace.c
+@@ -187,7 +187,6 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
+ struct snd_sof_dfsentry *dfse = file->private_data;
+ struct sof_ipc_trace_filter_elem *elems = NULL;
+ struct snd_sof_dev *sdev = dfse->sdev;
+- loff_t pos = 0;
+ int num_elems;
+ char *string;
+ int ret;
+@@ -202,11 +201,11 @@ static ssize_t dfsentry_trace_filter_write(struct file *file, const char __user
+ if (!string)
+ return -ENOMEM;
+
+- /* assert null termination */
+- string[count] = 0;
+- ret = simple_write_to_buffer(string, count, &pos, from, count);
+- if (ret < 0)
++ if (copy_from_user(string, from, count)) {
++ ret = -EFAULT;
+ goto error;
++ }
++ string[count] = '\0';
+
+ ret = trace_filter_parse(sdev, string, &num_elems, &elems);
+ if (ret < 0)
+--
+2.39.2
+
--- /dev/null
+From f56314f8f520be77c9344013ed73653e992d3600 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:32 +0300
+Subject: Bluetooth: hci_event: call disconnect callback before deleting conn
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit 7f7cfcb6f0825652973b780f248603e23f16ee90 ]
+
+In hci_cs_disconnect, we do hci_conn_del even if disconnection failed.
+
+ISO, L2CAP and SCO connections refer to the hci_conn without
+hci_conn_get, so disconn_cfm must be called so they can clean up their
+conn, otherwise use-after-free occurs.
+
+ISO:
+==========================================================
+iso_sock_connect:880: sk 00000000eabd6557
+iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+...
+iso_conn_add:140: hcon 000000001696f1fd conn 00000000b6251073
+hci_dev_put:1487: hci0 orig refcnt 17
+__iso_chan_add:214: conn 00000000b6251073
+iso_sock_clear_timer:117: sock 00000000eabd6557 state 3
+...
+hci_rx_work:4085: hci0 Event packet
+hci_event_packet:7601: hci0: event 0x0f
+hci_cmd_status_evt:4346: hci0: opcode 0x0406
+hci_cs_disconnect:2760: hci0: status 0x0c
+hci_sent_cmd_data:3107: hci0 opcode 0x0406
+hci_conn_del:1151: hci0 hcon 000000001696f1fd handle 2560
+hci_conn_unlink:1102: hci0: hcon 000000001696f1fd
+hci_conn_drop:1451: hcon 00000000d8521aaf orig refcnt 2
+hci_chan_list_flush:2780: hcon 000000001696f1fd
+hci_dev_put:1487: hci0 orig refcnt 21
+hci_dev_put:1487: hci0 orig refcnt 20
+hci_req_cmd_complete:3978: opcode 0x0406 status 0x0c
+... <no iso_* activity on sk/conn> ...
+iso_sock_sendmsg:1098: sock 00000000dea5e2e0, sk 00000000eabd6557
+BUG: kernel NULL pointer dereference, address: 0000000000000668
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:iso_sock_sendmsg (net/bluetooth/iso.c:1112) bluetooth
+==========================================================
+
+L2CAP:
+==================================================================
+hci_cmd_status_evt:4359: hci0: opcode 0x0406
+hci_cs_disconnect:2760: hci0: status 0x0c
+hci_sent_cmd_data:3085: hci0 opcode 0x0406
+hci_conn_del:1151: hci0 hcon ffff88800c999000 handle 3585
+hci_conn_unlink:1102: hci0: hcon ffff88800c999000
+hci_chan_list_flush:2780: hcon ffff88800c999000
+hci_chan_del:2761: hci0 hcon ffff88800c999000 chan ffff888018ddd280
+...
+BUG: KASAN: slab-use-after-free in hci_send_acl+0x2d/0x540 [bluetooth]
+Read of size 8 at addr ffff888018ddd298 by task bluetoothd/1175
+
+CPU: 0 PID: 1175 Comm: bluetoothd Tainted: G E 6.4.0-rc4+ #2
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+Call Trace:
+ <TASK>
+ dump_stack_lvl+0x5b/0x90
+ print_report+0xcf/0x670
+ ? __virt_addr_valid+0xf8/0x180
+ ? hci_send_acl+0x2d/0x540 [bluetooth]
+ kasan_report+0xa8/0xe0
+ ? hci_send_acl+0x2d/0x540 [bluetooth]
+ hci_send_acl+0x2d/0x540 [bluetooth]
+ ? __pfx___lock_acquire+0x10/0x10
+ l2cap_chan_send+0x1fd/0x1300 [bluetooth]
+ ? l2cap_sock_sendmsg+0xf2/0x170 [bluetooth]
+ ? __pfx_l2cap_chan_send+0x10/0x10 [bluetooth]
+ ? lock_release+0x1d5/0x3c0
+ ? mark_held_locks+0x1a/0x90
+ l2cap_sock_sendmsg+0x100/0x170 [bluetooth]
+ sock_write_iter+0x275/0x280
+ ? __pfx_sock_write_iter+0x10/0x10
+ ? __pfx___lock_acquire+0x10/0x10
+ do_iter_readv_writev+0x176/0x220
+ ? __pfx_do_iter_readv_writev+0x10/0x10
+ ? find_held_lock+0x83/0xa0
+ ? selinux_file_permission+0x13e/0x210
+ do_iter_write+0xda/0x340
+ vfs_writev+0x1b4/0x400
+ ? __pfx_vfs_writev+0x10/0x10
+ ? __seccomp_filter+0x112/0x750
+ ? populate_seccomp_data+0x182/0x220
+ ? __fget_light+0xdf/0x100
+ ? do_writev+0x19d/0x210
+ do_writev+0x19d/0x210
+ ? __pfx_do_writev+0x10/0x10
+ ? mark_held_locks+0x1a/0x90
+ do_syscall_64+0x60/0x90
+ ? lockdep_hardirqs_on_prepare+0x149/0x210
+ ? do_syscall_64+0x6c/0x90
+ ? lockdep_hardirqs_on_prepare+0x149/0x210
+ entry_SYSCALL_64_after_hwframe+0x72/0xdc
+RIP: 0033:0x7ff45cb23e64
+Code: 15 d1 1f 0d 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 80 3d 9d a7 0d 00 00 74 13 b8 14 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 54 c3 0f 1f 00 48 83 ec 28 89 54 24 1c 48 89
+RSP: 002b:00007fff21ae09b8 EFLAGS: 00000202 ORIG_RAX: 0000000000000014
+RAX: ffffffffffffffda RBX: 0000000000000001 RCX: 00007ff45cb23e64
+RDX: 0000000000000001 RSI: 00007fff21ae0aa0 RDI: 0000000000000017
+RBP: 00007fff21ae0aa0 R08: 000000000095a8a0 R09: 0000607000053f40
+R10: 0000000000000001 R11: 0000000000000202 R12: 00007fff21ae0ac0
+R13: 00000fffe435c150 R14: 00007fff21ae0a80 R15: 000060f000000040
+ </TASK>
+
+Allocated by task 771:
+ kasan_save_stack+0x33/0x60
+ kasan_set_track+0x25/0x30
+ __kasan_kmalloc+0xaa/0xb0
+ hci_chan_create+0x67/0x1b0 [bluetooth]
+ l2cap_conn_add.part.0+0x17/0x590 [bluetooth]
+ l2cap_connect_cfm+0x266/0x6b0 [bluetooth]
+ hci_le_remote_feat_complete_evt+0x167/0x310 [bluetooth]
+ hci_event_packet+0x38d/0x800 [bluetooth]
+ hci_rx_work+0x287/0xb20 [bluetooth]
+ process_one_work+0x4f7/0x970
+ worker_thread+0x8f/0x620
+ kthread+0x17f/0x1c0
+ ret_from_fork+0x2c/0x50
+
+Freed by task 771:
+ kasan_save_stack+0x33/0x60
+ kasan_set_track+0x25/0x30
+ kasan_save_free_info+0x2e/0x50
+ ____kasan_slab_free+0x169/0x1c0
+ slab_free_freelist_hook+0x9e/0x1c0
+ __kmem_cache_free+0xc0/0x310
+ hci_chan_list_flush+0x46/0x90 [bluetooth]
+ hci_conn_cleanup+0x7d/0x330 [bluetooth]
+ hci_cs_disconnect+0x35d/0x530 [bluetooth]
+ hci_cmd_status_evt+0xef/0x2b0 [bluetooth]
+ hci_event_packet+0x38d/0x800 [bluetooth]
+ hci_rx_work+0x287/0xb20 [bluetooth]
+ process_one_work+0x4f7/0x970
+ worker_thread+0x8f/0x620
+ kthread+0x17f/0x1c0
+ ret_from_fork+0x2c/0x50
+==================================================================
+
+Fixes: b8d290525e39 ("Bluetooth: clean up connection in hci_cs_disconnect")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_event.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index ec9b0612f2761..83eaf25ece465 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -2789,6 +2789,9 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
+ hci_enable_advertising(hdev);
+ }
+
++ /* Inform sockets conn is gone before we delete it */
++ hci_disconn_cfm(conn, HCI_ERROR_UNSPECIFIED);
++
+ goto done;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 37d8d1ea773870a99ffb70e4fb61facc4b296dfc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 30 Jun 2023 15:33:14 -0700
+Subject: Bluetooth: hci_sync: Avoid use-after-free in dbg for
+ hci_remove_adv_monitor()
+
+From: Douglas Anderson <dianders@chromium.org>
+
+[ Upstream commit de6dfcefd107667ce2dbedf4d9337f5ed557a4a1 ]
+
+KASAN reports that there's a use-after-free in
+hci_remove_adv_monitor(). Trawling through the disassembly, you can
+see that the complaint is from the access in bt_dev_dbg() under the
+HCI_ADV_MONITOR_EXT_MSFT case. The problem case happens because
+msft_remove_monitor() can end up freeing the monitor
+structure. Specifically:
+ hci_remove_adv_monitor() ->
+ msft_remove_monitor() ->
+ msft_remove_monitor_sync() ->
+ msft_le_cancel_monitor_advertisement_cb() ->
+ hci_free_adv_monitor()
+
+Let's fix the problem by just stashing the relevant data when it's
+still valid.
+
+Fixes: 7cf5c2978f23 ("Bluetooth: hci_sync: Refactor remove Adv Monitor")
+Signed-off-by: Douglas Anderson <dianders@chromium.org>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/hci_core.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index be0e6865b340f..d034bf2a999e1 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -1972,6 +1972,7 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
+ struct adv_monitor *monitor)
+ {
+ int status = 0;
++ int handle;
+
+ switch (hci_get_adv_monitor_offload_ext(hdev)) {
+ case HCI_ADV_MONITOR_EXT_NONE: /* also goes here when powered off */
+@@ -1980,9 +1981,10 @@ static int hci_remove_adv_monitor(struct hci_dev *hdev,
+ goto free_monitor;
+
+ case HCI_ADV_MONITOR_EXT_MSFT:
++ handle = monitor->handle;
+ status = msft_remove_monitor(hdev, monitor);
+ bt_dev_dbg(hdev, "%s remove monitor %d msft status %d",
+- hdev->name, monitor->handle, status);
++ hdev->name, handle, status);
+ break;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 1bba473b620234ccdcf3a2b08e021f5b27202ce4 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:33 +0300
+Subject: Bluetooth: ISO: fix iso_conn related locking and validity issues
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit d40ae85ee62e3666f45bc61864b22121346f88ef ]
+
+sk->sk_state indicates whether iso_pi(sk)->conn is valid. Operations
+that check/update sk_state and access conn should hold lock_sock,
+otherwise they can race.
+
+The order of taking locks is hci_dev_lock > lock_sock > iso_conn_lock,
+which is how it is in connect/disconnect_cfm -> iso_conn_del ->
+iso_chan_del.
+
+Fix locking in iso_connect_cis/bis and sendmsg/recvmsg to take lock_sock
+around updating sk_state and conn.
+
+iso_conn_del must not occur during iso_connect_cis/bis, as it frees the
+iso_conn. Hold hdev->lock longer to prevent that.
+
+This should not reintroduce the issue fixed in commit 241f51931c35
+("Bluetooth: ISO: Avoid circular locking dependency"), since the we
+acquire locks in order. We retain the fix in iso_sock_connect to release
+lock_sock before iso_connect_* acquires hdev->lock.
+
+Similarly for commit 6a5ad251b7cd ("Bluetooth: ISO: Fix possible
+circular locking dependency"). We retain the fix in iso_conn_ready to
+not acquire iso_conn_lock before lock_sock.
+
+iso_conn_add shall return iso_conn with valid hcon. Make it so also when
+reusing an old CIS connection waiting for disconnect timeout (see
+__iso_sock_close where conn->hcon is set to NULL).
+
+Trace with iso_conn_del after iso_chan_add in iso_connect_cis:
+===============================================================
+iso_sock_create:771: sock 00000000be9b69b7
+iso_sock_init:693: sk 000000004dff667e
+iso_sock_bind:827: sk 000000004dff667e 70:1a:b8:98:ff:a2 type 1
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_setsockopt:1289: sk 000000004dff667e
+iso_sock_connect:875: sk 000000004dff667e
+iso_connect_cis:353: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+hci_conn_add:1005: hci0 dst 28:3d:c2:4a:7e:da
+iso_conn_add:140: hcon 000000007b65d182 conn 00000000daf8625e
+__iso_chan_add:214: conn 00000000daf8625e
+iso_connect_cfm:1700: hcon 000000007b65d182 bdaddr 28:3d:c2:4a:7e:da status 12
+iso_conn_del:187: hcon 000000007b65d182 conn 00000000daf8625e, err 16
+iso_sock_clear_timer:117: sock 000000004dff667e state 3
+ <Note: sk_state is BT_BOUND (3), so iso_connect_cis is still
+ running at this point>
+iso_chan_del:153: sk 000000004dff667e, conn 00000000daf8625e, err 16
+hci_conn_del:1151: hci0 hcon 000000007b65d182 handle 65535
+hci_conn_unlink:1102: hci0: hcon 000000007b65d182
+hci_chan_list_flush:2780: hcon 000000007b65d182
+iso_sock_getsockopt:1376: sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getsockopt:1376: sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_getname:1070: sock 00000000be9b69b7, sk 000000004dff667e
+iso_sock_shutdown:1434: sock 00000000be9b69b7, sk 000000004dff667e, how 1
+__iso_sock_close:632: sk 000000004dff667e state 5 socket 00000000be9b69b7
+ <Note: sk_state is BT_CONNECT (5), even though iso_chan_del sets
+ BT_CLOSED (6). Only iso_connect_cis sets it to BT_CONNECT, so it
+ must be that iso_chan_del occurred between iso_chan_add and end of
+ iso_connect_cis.>
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+PGD 8000000006467067 P4D 8000000006467067 PUD 3f5f067 PMD 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:__iso_sock_close (net/bluetooth/iso.c:664) bluetooth
+===============================================================
+
+Trace with iso_conn_del before iso_chan_add in iso_connect_cis:
+===============================================================
+iso_connect_cis:356: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7e:da
+...
+iso_conn_add:140: hcon 0000000093bc551f conn 00000000768ae504
+hci_dev_put:1487: hci0 orig refcnt 21
+hci_event_packet:7607: hci0: event 0x0e
+hci_cmd_complete_evt:4231: hci0: opcode 0x2062
+hci_cc_le_set_cig_params:3846: hci0: status 0x07
+hci_sent_cmd_data:3107: hci0 opcode 0x2062
+iso_connect_cfm:1703: hcon 0000000093bc551f bdaddr 28:3d:c2:4a:7e:da status 7
+iso_conn_del:187: hcon 0000000093bc551f conn 00000000768ae504, err 12
+hci_conn_del:1151: hci0 hcon 0000000093bc551f handle 65535
+hci_conn_unlink:1102: hci0: hcon 0000000093bc551f
+hci_chan_list_flush:2780: hcon 0000000093bc551f
+__iso_chan_add:214: conn 00000000768ae504
+ <Note: this conn was already freed in iso_conn_del above>
+iso_sock_clear_timer:117: sock 0000000098323f95 state 3
+general protection fault, probably for non-canonical address 0x30b29c630930aec8: 0000 [#1] PREEMPT SMP PTI
+CPU: 1 PID: 1920 Comm: bluetoothd Tainted: G E 6.3.0-rc7+ #4
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:detach_if_pending+0x28/0xd0
+Code: 90 90 0f 1f 44 00 00 48 8b 47 08 48 85 c0 0f 84 ad 00 00 00 55 89 d5 53 48 83 3f 00 48 89 fb 74 7d 66 90 48 8b 03 48 8b 53 08 <>
+RSP: 0018:ffffb90841a67d08 EFLAGS: 00010007
+RAX: 0000000000000000 RBX: ffff9141bd5061b8 RCX: 0000000000000000
+RDX: 30b29c630930aec8 RSI: ffff9141fdd21e80 RDI: ffff9141bd5061b8
+RBP: 0000000000000001 R08: 0000000000000000 R09: ffffb90841a67b88
+R10: 0000000000000003 R11: ffffffff8613f558 R12: ffff9141fdd21e80
+R13: 0000000000000000 R14: ffff9141b5976010 R15: ffff914185755338
+FS: 00007f45768bd840(0000) GS:ffff9141fdd00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000619000424074 CR3: 0000000009f5e005 CR4: 0000000000170ee0
+Call Trace:
+ <TASK>
+ timer_delete+0x48/0x80
+ try_to_grab_pending+0xdf/0x170
+ __cancel_work+0x37/0xb0
+ iso_connect_cis+0x141/0x400 [bluetooth]
+===============================================================
+
+Trace with NULL conn->hcon in state BT_CONNECT:
+===============================================================
+__iso_sock_close:619: sk 00000000f7c71fc5 state 1 socket 00000000d90c5fe5
+...
+__iso_sock_close:619: sk 00000000f7c71fc5 state 8 socket 00000000d90c5fe5
+iso_chan_del:153: sk 00000000f7c71fc5, conn 0000000022c03a7e, err 104
+...
+iso_sock_connect:862: sk 00000000129b56c3
+iso_connect_cis:348: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a
+hci_get_route:1199: 70:1a:b8:98:ff:a2 -> 28:3d:c2:4a:7d:2a
+hci_dev_hold:1495: hci0 orig refcnt 19
+__iso_chan_add:214: conn 0000000022c03a7e
+ <Note: reusing old conn>
+iso_sock_clear_timer:117: sock 00000000129b56c3 state 3
+...
+iso_sock_ready:1485: sk 00000000129b56c3
+...
+iso_sock_sendmsg:1077: sock 00000000e5013966, sk 00000000129b56c3
+BUG: kernel NULL pointer dereference, address: 00000000000006a8
+PGD 0 P4D 0
+Oops: 0000 [#1] PREEMPT SMP PTI
+CPU: 1 PID: 1403 Comm: wireplumber Tainted: G E 6.3.0-rc7+ #4
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+RIP: 0010:iso_sock_sendmsg+0x63/0x2a0 [bluetooth]
+===============================================================
+
+Fixes: 241f51931c35 ("Bluetooth: ISO: Avoid circular locking dependency")
+Fixes: 6a5ad251b7cd ("Bluetooth: ISO: Fix possible circular locking dependency")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bluetooth/iso.c | 53 ++++++++++++++++++++++++++-------------------
+ 1 file changed, 31 insertions(+), 22 deletions(-)
+
+diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
+index cb959e8eac185..699e4f400df29 100644
+--- a/net/bluetooth/iso.c
++++ b/net/bluetooth/iso.c
+@@ -116,8 +116,11 @@ static struct iso_conn *iso_conn_add(struct hci_conn *hcon)
+ {
+ struct iso_conn *conn = hcon->iso_data;
+
+- if (conn)
++ if (conn) {
++ if (!conn->hcon)
++ conn->hcon = hcon;
+ return conn;
++ }
+
+ conn = kzalloc(sizeof(*conn), GFP_KERNEL);
+ if (!conn)
+@@ -285,14 +288,13 @@ static int iso_connect_bis(struct sock *sk)
+ goto unlock;
+ }
+
+- hci_dev_unlock(hdev);
+- hci_dev_put(hdev);
++ lock_sock(sk);
+
+ err = iso_chan_add(conn, sk, NULL);
+- if (err)
+- return err;
+-
+- lock_sock(sk);
++ if (err) {
++ release_sock(sk);
++ goto unlock;
++ }
+
+ /* Update source addr of the socket */
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+@@ -306,7 +308,6 @@ static int iso_connect_bis(struct sock *sk)
+ }
+
+ release_sock(sk);
+- return err;
+
+ unlock:
+ hci_dev_unlock(hdev);
+@@ -367,14 +368,13 @@ static int iso_connect_cis(struct sock *sk)
+ goto unlock;
+ }
+
+- hci_dev_unlock(hdev);
+- hci_dev_put(hdev);
++ lock_sock(sk);
+
+ err = iso_chan_add(conn, sk, NULL);
+- if (err)
+- return err;
+-
+- lock_sock(sk);
++ if (err) {
++ release_sock(sk);
++ goto unlock;
++ }
+
+ /* Update source addr of the socket */
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+@@ -391,7 +391,6 @@ static int iso_connect_cis(struct sock *sk)
+ }
+
+ release_sock(sk);
+- return err;
+
+ unlock:
+ hci_dev_unlock(hdev);
+@@ -1036,8 +1035,8 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ size_t len)
+ {
+ struct sock *sk = sock->sk;
+- struct iso_conn *conn = iso_pi(sk)->conn;
+ struct sk_buff *skb, **frag;
++ size_t mtu;
+ int err;
+
+ BT_DBG("sock %p, sk %p", sock, sk);
+@@ -1049,11 +1048,18 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ if (msg->msg_flags & MSG_OOB)
+ return -EOPNOTSUPP;
+
+- if (sk->sk_state != BT_CONNECTED)
++ lock_sock(sk);
++
++ if (sk->sk_state != BT_CONNECTED) {
++ release_sock(sk);
+ return -ENOTCONN;
++ }
++
++ mtu = iso_pi(sk)->conn->hcon->hdev->iso_mtu;
++
++ release_sock(sk);
+
+- skb = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+- HCI_ISO_DATA_HDR_SIZE, 0);
++ skb = bt_skb_sendmsg(sk, msg, len, mtu, HCI_ISO_DATA_HDR_SIZE, 0);
+ if (IS_ERR(skb))
+ return PTR_ERR(skb);
+
+@@ -1066,8 +1072,7 @@ static int iso_sock_sendmsg(struct socket *sock, struct msghdr *msg,
+ while (len) {
+ struct sk_buff *tmp;
+
+- tmp = bt_skb_sendmsg(sk, msg, len, conn->hcon->hdev->iso_mtu,
+- 0, 0);
++ tmp = bt_skb_sendmsg(sk, msg, len, mtu, 0, 0);
+ if (IS_ERR(tmp)) {
+ kfree_skb(skb);
+ return PTR_ERR(tmp);
+@@ -1122,15 +1127,19 @@ static int iso_sock_recvmsg(struct socket *sock, struct msghdr *msg,
+ BT_DBG("sk %p", sk);
+
+ if (test_and_clear_bit(BT_SK_DEFER_SETUP, &bt_sk(sk)->flags)) {
++ lock_sock(sk);
+ switch (sk->sk_state) {
+ case BT_CONNECT2:
+- lock_sock(sk);
+ iso_conn_defer_accept(pi->conn->hcon);
+ sk->sk_state = BT_CONFIG;
+ release_sock(sk);
+ return 0;
+ case BT_CONNECT:
++ release_sock(sk);
+ return iso_connect_cis(sk);
++ default:
++ release_sock(sk);
++ break;
+ }
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 6fa1ac47040a970b9823dd880eeff4a1f5d2c7a1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 19 Jun 2023 01:04:31 +0300
+Subject: Bluetooth: use RCU for hci_conn_params and iterate safely in hci_sync
+
+From: Pauli Virtanen <pav@iki.fi>
+
+[ Upstream commit 195ef75e19287b4bc413da3e3e3722b030ac881e ]
+
+hci_update_accept_list_sync iterates over hdev->pend_le_conns and
+hdev->pend_le_reports, and waits for controller events in the loop body,
+without holding hdev lock.
+
+Meanwhile, these lists and the items may be modified e.g. by
+le_scan_cleanup. This can invalidate the list cursor or any other item
+in the list, resulting to invalid behavior (eg use-after-free).
+
+Use RCU for the hci_conn_params action lists. Since the loop bodies in
+hci_sync block and we cannot use RCU or hdev->lock for the whole loop,
+copy list items first and then iterate on the copy. Only the flags field
+is written from elsewhere, so READ_ONCE/WRITE_ONCE should guarantee we
+read valid values.
+
+Free params everywhere with hci_conn_params_free so the cleanup is
+guaranteed to be done properly.
+
+This fixes the following, which can be triggered e.g. by BlueZ new
+mgmt-tester case "Add + Remove Device Nowait - Success", or by changing
+hci_le_set_cig_params to always return false, and running iso-tester:
+
+==================================================================
+BUG: KASAN: slab-use-after-free in hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+Read of size 8 at addr ffff888001265018 by task kworker/u3:0/32
+
+Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.16.2-1.fc38 04/01/2014
+Workqueue: hci0 hci_cmd_sync_work
+Call Trace:
+<TASK>
+dump_stack_lvl (./arch/x86/include/asm/irqflags.h:134 lib/dump_stack.c:107)
+print_report (mm/kasan/report.c:320 mm/kasan/report.c:430)
+? __virt_addr_valid (./include/linux/mmzone.h:1915 ./include/linux/mmzone.h:2011 arch/x86/mm/physaddr.c:65)
+? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+kasan_report (mm/kasan/report.c:538)
+? hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2536 net/bluetooth/hci_sync.c:2723 net/bluetooth/hci_sync.c:2841)
+? __pfx_hci_update_passive_scan_sync (net/bluetooth/hci_sync.c:2780)
+? mutex_lock (kernel/locking/mutex.c:282)
+? __pfx_mutex_lock (kernel/locking/mutex.c:282)
+? __pfx_mutex_unlock (kernel/locking/mutex.c:538)
+? __pfx_update_passive_scan_sync (net/bluetooth/hci_sync.c:2861)
+hci_cmd_sync_work (net/bluetooth/hci_sync.c:306)
+process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399)
+worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538)
+? __pfx_worker_thread (kernel/workqueue.c:2480)
+kthread (kernel/kthread.c:376)
+? __pfx_kthread (kernel/kthread.c:331)
+ret_from_fork (arch/x86/entry/entry_64.S:314)
+</TASK>
+
+Allocated by task 31:
+kasan_save_stack (mm/kasan/common.c:46)
+kasan_set_track (mm/kasan/common.c:52)
+__kasan_kmalloc (mm/kasan/common.c:374 mm/kasan/common.c:383)
+hci_conn_params_add (./include/linux/slab.h:580 ./include/linux/slab.h:720 net/bluetooth/hci_core.c:2277)
+hci_connect_le_scan (net/bluetooth/hci_conn.c:1419 net/bluetooth/hci_conn.c:1589)
+hci_connect_cis (net/bluetooth/hci_conn.c:2266)
+iso_connect_cis (net/bluetooth/iso.c:390)
+iso_sock_connect (net/bluetooth/iso.c:899)
+__sys_connect (net/socket.c:2003 net/socket.c:2020)
+__x64_sys_connect (net/socket.c:2027)
+do_syscall_64 (arch/x86/entry/common.c:50 arch/x86/entry/common.c:80)
+entry_SYSCALL_64_after_hwframe (arch/x86/entry/entry_64.S:120)
+
+Freed by task 15:
+kasan_save_stack (mm/kasan/common.c:46)
+kasan_set_track (mm/kasan/common.c:52)
+kasan_save_free_info (mm/kasan/generic.c:523)
+__kasan_slab_free (mm/kasan/common.c:238 mm/kasan/common.c:200 mm/kasan/common.c:244)
+__kmem_cache_free (mm/slub.c:1807 mm/slub.c:3787 mm/slub.c:3800)
+hci_conn_params_del (net/bluetooth/hci_core.c:2323)
+le_scan_cleanup (net/bluetooth/hci_conn.c:202)
+process_one_work (./arch/x86/include/asm/preempt.h:27 kernel/workqueue.c:2399)
+worker_thread (./include/linux/list.h:292 kernel/workqueue.c:2538)
+kthread (kernel/kthread.c:376)
+ret_from_fork (arch/x86/entry/entry_64.S:314)
+==================================================================
+
+Fixes: e8907f76544f ("Bluetooth: hci_sync: Make use of hci_cmd_sync_queue set 3")
+Signed-off-by: Pauli Virtanen <pav@iki.fi>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/bluetooth/hci_core.h | 5 ++
+ net/bluetooth/hci_conn.c | 10 +--
+ net/bluetooth/hci_core.c | 38 ++++++++--
+ net/bluetooth/hci_event.c | 12 ++--
+ net/bluetooth/hci_sync.c | 117 ++++++++++++++++++++++++++++---
+ net/bluetooth/mgmt.c | 26 +++----
+ 6 files changed, 164 insertions(+), 44 deletions(-)
+
+diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
+index 84c5ce57eab69..ddbcbf9ccb2ce 100644
+--- a/include/net/bluetooth/hci_core.h
++++ b/include/net/bluetooth/hci_core.h
+@@ -807,6 +807,7 @@ struct hci_conn_params {
+
+ struct hci_conn *conn;
+ bool explicit_connect;
++ /* Accessed without hdev->lock: */
+ hci_conn_flags_t flags;
+ u8 privacy_mode;
+ };
+@@ -1536,7 +1537,11 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+ bdaddr_t *addr, u8 addr_type);
+ void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
+ void hci_conn_params_clear_disabled(struct hci_dev *hdev);
++void hci_conn_params_free(struct hci_conn_params *param);
+
++void hci_pend_le_list_del_init(struct hci_conn_params *param);
++void hci_pend_le_list_add(struct hci_conn_params *param,
++ struct list_head *list);
+ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
+ bdaddr_t *addr,
+ u8 addr_type);
+diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
+index fef09d2121384..61059571c8779 100644
+--- a/net/bluetooth/hci_conn.c
++++ b/net/bluetooth/hci_conn.c
+@@ -117,7 +117,7 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
+ */
+ params->explicit_connect = false;
+
+- list_del_init(¶ms->action);
++ hci_pend_le_list_del_init(params);
+
+ switch (params->auto_connect) {
+ case HCI_AUTO_CONN_EXPLICIT:
+@@ -126,10 +126,10 @@ static void hci_connect_le_scan_cleanup(struct hci_conn *conn, u8 status)
+ return;
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ break;
+ case HCI_AUTO_CONN_REPORT:
+- list_add(¶ms->action, &hdev->pend_le_reports);
++ hci_pend_le_list_add(params, &hdev->pend_le_reports);
+ break;
+ default:
+ break;
+@@ -1398,8 +1398,8 @@ static int hci_explicit_conn_params_set(struct hci_dev *hdev,
+ if (params->auto_connect == HCI_AUTO_CONN_DISABLED ||
+ params->auto_connect == HCI_AUTO_CONN_REPORT ||
+ params->auto_connect == HCI_AUTO_CONN_EXPLICIT) {
+- list_del_init(¶ms->action);
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_del_init(params);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ }
+
+ params->explicit_connect = true;
+diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
+index ca42129f8f91a..be0e6865b340f 100644
+--- a/net/bluetooth/hci_core.c
++++ b/net/bluetooth/hci_core.c
+@@ -2249,21 +2249,45 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
+ return NULL;
+ }
+
+-/* This function requires the caller holds hdev->lock */
++/* This function requires the caller holds hdev->lock or rcu_read_lock */
+ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
+ bdaddr_t *addr, u8 addr_type)
+ {
+ struct hci_conn_params *param;
+
+- list_for_each_entry(param, list, action) {
++ rcu_read_lock();
++
++ list_for_each_entry_rcu(param, list, action) {
+ if (bacmp(¶m->addr, addr) == 0 &&
+- param->addr_type == addr_type)
++ param->addr_type == addr_type) {
++ rcu_read_unlock();
+ return param;
++ }
+ }
+
++ rcu_read_unlock();
++
+ return NULL;
+ }
+
++/* This function requires the caller holds hdev->lock */
++void hci_pend_le_list_del_init(struct hci_conn_params *param)
++{
++ if (list_empty(¶m->action))
++ return;
++
++ list_del_rcu(¶m->action);
++ synchronize_rcu();
++ INIT_LIST_HEAD(¶m->action);
++}
++
++/* This function requires the caller holds hdev->lock */
++void hci_pend_le_list_add(struct hci_conn_params *param,
++ struct list_head *list)
++{
++ list_add_rcu(¶m->action, list);
++}
++
+ /* This function requires the caller holds hdev->lock */
+ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+ bdaddr_t *addr, u8 addr_type)
+@@ -2297,14 +2321,15 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
+ return params;
+ }
+
+-static void hci_conn_params_free(struct hci_conn_params *params)
++void hci_conn_params_free(struct hci_conn_params *params)
+ {
++ hci_pend_le_list_del_init(params);
++
+ if (params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+ }
+
+- list_del(¶ms->action);
+ list_del(¶ms->list);
+ kfree(params);
+ }
+@@ -2342,8 +2367,7 @@ void hci_conn_params_clear_disabled(struct hci_dev *hdev)
+ continue;
+ }
+
+- list_del(¶ms->list);
+- kfree(params);
++ hci_conn_params_free(params);
+ }
+
+ BT_DBG("All LE disabled connection parameters were removed");
+diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
+index b272cc1f36481..ec9b0612f2761 100644
+--- a/net/bluetooth/hci_event.c
++++ b/net/bluetooth/hci_event.c
+@@ -1558,7 +1558,7 @@ static u8 hci_cc_le_set_privacy_mode(struct hci_dev *hdev, void *data,
+
+ params = hci_conn_params_lookup(hdev, &cp->bdaddr, cp->bdaddr_type);
+ if (params)
+- params->privacy_mode = cp->mode;
++ WRITE_ONCE(params->privacy_mode, cp->mode);
+
+ hci_dev_unlock(hdev);
+
+@@ -2809,8 +2809,8 @@ static void hci_cs_disconnect(struct hci_dev *hdev, u8 status)
+
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+- list_del_init(¶ms->action);
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_del_init(params);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ break;
+
+ default:
+@@ -3428,8 +3428,8 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, void *data,
+
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+- list_del_init(¶ms->action);
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_del_init(params);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ hci_update_passive_scan(hdev);
+ break;
+
+@@ -5952,7 +5952,7 @@ static void le_conn_complete_evt(struct hci_dev *hdev, u8 status,
+ params = hci_pend_le_action_lookup(&hdev->pend_le_conns, &conn->dst,
+ conn->dst_type);
+ if (params) {
+- list_del_init(¶ms->action);
++ hci_pend_le_list_del_init(params);
+ if (params->conn) {
+ hci_conn_drop(params->conn);
+ hci_conn_put(params->conn);
+diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
+index 37131a36700a1..2ae038dfc39f7 100644
+--- a/net/bluetooth/hci_sync.c
++++ b/net/bluetooth/hci_sync.c
+@@ -2139,15 +2139,23 @@ static int hci_le_del_accept_list_sync(struct hci_dev *hdev,
+ return 0;
+ }
+
++struct conn_params {
++ bdaddr_t addr;
++ u8 addr_type;
++ hci_conn_flags_t flags;
++ u8 privacy_mode;
++};
++
+ /* Adds connection to resolve list if needed.
+ * Setting params to NULL programs local hdev->irk
+ */
+ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+- struct hci_conn_params *params)
++ struct conn_params *params)
+ {
+ struct hci_cp_le_add_to_resolv_list cp;
+ struct smp_irk *irk;
+ struct bdaddr_list_with_irk *entry;
++ struct hci_conn_params *p;
+
+ if (!use_ll_privacy(hdev))
+ return 0;
+@@ -2182,6 +2190,16 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+ /* Default privacy mode is always Network */
+ params->privacy_mode = HCI_NETWORK_PRIVACY;
+
++ rcu_read_lock();
++ p = hci_pend_le_action_lookup(&hdev->pend_le_conns,
++ ¶ms->addr, params->addr_type);
++ if (!p)
++ p = hci_pend_le_action_lookup(&hdev->pend_le_reports,
++ ¶ms->addr, params->addr_type);
++ if (p)
++ WRITE_ONCE(p->privacy_mode, HCI_NETWORK_PRIVACY);
++ rcu_read_unlock();
++
+ done:
+ if (hci_dev_test_flag(hdev, HCI_PRIVACY))
+ memcpy(cp.local_irk, hdev->irk, 16);
+@@ -2194,7 +2212,7 @@ static int hci_le_add_resolve_list_sync(struct hci_dev *hdev,
+
+ /* Set Device Privacy Mode. */
+ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+- struct hci_conn_params *params)
++ struct conn_params *params)
+ {
+ struct hci_cp_le_set_privacy_mode cp;
+ struct smp_irk *irk;
+@@ -2219,6 +2237,8 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+ bacpy(&cp.bdaddr, &irk->bdaddr);
+ cp.mode = HCI_DEVICE_PRIVACY;
+
++ /* Note: params->privacy_mode is not updated since it is a copy */
++
+ return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_PRIVACY_MODE,
+ sizeof(cp), &cp, HCI_CMD_TIMEOUT);
+ }
+@@ -2228,7 +2248,7 @@ static int hci_le_set_privacy_mode_sync(struct hci_dev *hdev,
+ * properly set the privacy mode.
+ */
+ static int hci_le_add_accept_list_sync(struct hci_dev *hdev,
+- struct hci_conn_params *params,
++ struct conn_params *params,
+ u8 *num_entries)
+ {
+ struct hci_cp_le_add_to_accept_list cp;
+@@ -2426,6 +2446,52 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
+ return __hci_cmd_sync_sk(hdev, opcode, 0, NULL, 0, HCI_CMD_TIMEOUT, sk);
+ }
+
++static struct conn_params *conn_params_copy(struct list_head *list, size_t *n)
++{
++ struct hci_conn_params *params;
++ struct conn_params *p;
++ size_t i;
++
++ rcu_read_lock();
++
++ i = 0;
++ list_for_each_entry_rcu(params, list, action)
++ ++i;
++ *n = i;
++
++ rcu_read_unlock();
++
++ p = kvcalloc(*n, sizeof(struct conn_params), GFP_KERNEL);
++ if (!p)
++ return NULL;
++
++ rcu_read_lock();
++
++ i = 0;
++ list_for_each_entry_rcu(params, list, action) {
++ /* Racing adds are handled in next scan update */
++ if (i >= *n)
++ break;
++
++ /* No hdev->lock, but: addr, addr_type are immutable.
++ * privacy_mode is only written by us or in
++ * hci_cc_le_set_privacy_mode that we wait for.
++ * We should be idempotent so MGMT updating flags
++ * while we are processing is OK.
++ */
++ bacpy(&p[i].addr, ¶ms->addr);
++ p[i].addr_type = params->addr_type;
++ p[i].flags = READ_ONCE(params->flags);
++ p[i].privacy_mode = READ_ONCE(params->privacy_mode);
++ ++i;
++ }
++
++ rcu_read_unlock();
++
++ *n = i;
++ return p;
++}
++
+ /* Device must not be scanning when updating the accept list.
+ *
+ * Update is done using the following sequence:
+@@ -2445,11 +2511,12 @@ struct sk_buff *hci_read_local_oob_data_sync(struct hci_dev *hdev,
+ */
+ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+ {
+- struct hci_conn_params *params;
++ struct conn_params *params;
+ struct bdaddr_list *b, *t;
+ u8 num_entries = 0;
+ bool pend_conn, pend_report;
+ u8 filter_policy;
++ size_t i, n;
+ int err;
+
+ /* Pause advertising if resolving list can be used as controllers
+@@ -2483,6 +2550,7 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+ if (hci_conn_hash_lookup_le(hdev, &b->bdaddr, b->bdaddr_type))
+ continue;
+
++ /* Pointers not dereferenced, no locks needed */
+ pend_conn = hci_pend_le_action_lookup(&hdev->pend_le_conns,
+ &b->bdaddr,
+ b->bdaddr_type);
+@@ -2511,23 +2579,50 @@ static u8 hci_update_accept_list_sync(struct hci_dev *hdev)
+ * available accept list entries in the controller, then
+ * just abort and return filer policy value to not use the
+ * accept list.
++ *
++ * The list and params may be mutated while we wait for events,
++ * so make a copy and iterate it.
+ */
+- list_for_each_entry(params, &hdev->pend_le_conns, action) {
+- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
+- if (err)
++
++ params = conn_params_copy(&hdev->pend_le_conns, &n);
++ if (!params) {
++ err = -ENOMEM;
++ goto done;
++ }
++
++ for (i = 0; i < n; ++i) {
++ err = hci_le_add_accept_list_sync(hdev, ¶ms[i],
++ &num_entries);
++ if (err) {
++ kvfree(params);
+ goto done;
++ }
+ }
+
++ kvfree(params);
++
+ /* After adding all new pending connections, walk through
+ * the list of pending reports and also add these to the
+ * accept list if there is still space. Abort if space runs out.
+ */
+- list_for_each_entry(params, &hdev->pend_le_reports, action) {
+- err = hci_le_add_accept_list_sync(hdev, params, &num_entries);
+- if (err)
++
++ params = conn_params_copy(&hdev->pend_le_reports, &n);
++ if (!params) {
++ err = -ENOMEM;
++ goto done;
++ }
++
++ for (i = 0; i < n; ++i) {
++ err = hci_le_add_accept_list_sync(hdev, ¶ms[i],
++ &num_entries);
++ if (err) {
++ kvfree(params);
+ goto done;
++ }
+ }
+
++ kvfree(params);
++
+ /* Use the allowlist unless the following conditions are all true:
+ * - We are not currently suspending
+ * - There are 1 or more ADV monitors registered and it's not offloaded
+@@ -4778,12 +4873,12 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev)
+ struct hci_conn_params *p;
+
+ list_for_each_entry(p, &hdev->le_conn_params, list) {
++ hci_pend_le_list_del_init(p);
+ if (p->conn) {
+ hci_conn_drop(p->conn);
+ hci_conn_put(p->conn);
+ p->conn = NULL;
+ }
+- list_del_init(&p->action);
+ }
+
+ BT_DBG("All LE pending actions cleared");
+diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
+index 815f2abe918ef..89c94f3e96bc3 100644
+--- a/net/bluetooth/mgmt.c
++++ b/net/bluetooth/mgmt.c
+@@ -1297,15 +1297,15 @@ static void restart_le_actions(struct hci_dev *hdev)
+ /* Needed for AUTO_OFF case where might not "really"
+ * have been powered off.
+ */
+- list_del_init(&p->action);
++ hci_pend_le_list_del_init(p);
+
+ switch (p->auto_connect) {
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+- list_add(&p->action, &hdev->pend_le_conns);
++ hci_pend_le_list_add(p, &hdev->pend_le_conns);
+ break;
+ case HCI_AUTO_CONN_REPORT:
+- list_add(&p->action, &hdev->pend_le_reports);
++ hci_pend_le_list_add(p, &hdev->pend_le_reports);
+ break;
+ default:
+ break;
+@@ -5161,7 +5161,7 @@ static int set_device_flags(struct sock *sk, struct hci_dev *hdev, void *data,
+ goto unlock;
+ }
+
+- params->flags = current_flags;
++ WRITE_ONCE(params->flags, current_flags);
+ status = MGMT_STATUS_SUCCESS;
+
+ /* Update passive scan if HCI_CONN_FLAG_DEVICE_PRIVACY
+@@ -7573,7 +7573,7 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
+ if (params->auto_connect == auto_connect)
+ return 0;
+
+- list_del_init(¶ms->action);
++ hci_pend_le_list_del_init(params);
+
+ switch (auto_connect) {
+ case HCI_AUTO_CONN_DISABLED:
+@@ -7582,18 +7582,18 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
+ * connect to device, keep connecting.
+ */
+ if (params->explicit_connect)
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ break;
+ case HCI_AUTO_CONN_REPORT:
+ if (params->explicit_connect)
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ else
+- list_add(¶ms->action, &hdev->pend_le_reports);
++ hci_pend_le_list_add(params, &hdev->pend_le_reports);
+ break;
+ case HCI_AUTO_CONN_DIRECT:
+ case HCI_AUTO_CONN_ALWAYS:
+ if (!is_connected(hdev, addr, addr_type))
+- list_add(¶ms->action, &hdev->pend_le_conns);
++ hci_pend_le_list_add(params, &hdev->pend_le_conns);
+ break;
+ }
+
+@@ -7816,9 +7816,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
+ goto unlock;
+ }
+
+- list_del(¶ms->action);
+- list_del(¶ms->list);
+- kfree(params);
++ hci_conn_params_free(params);
+
+ device_removed(sk, hdev, &cp->addr.bdaddr, cp->addr.type);
+ } else {
+@@ -7849,9 +7847,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
+ p->auto_connect = HCI_AUTO_CONN_EXPLICIT;
+ continue;
+ }
+- list_del(&p->action);
+- list_del(&p->list);
+- kfree(p);
++ hci_conn_params_free(p);
+ }
+
+ bt_dev_dbg(hdev, "All LE connection parameters were removed");
+--
+2.39.2
+
--- /dev/null
+From ccf4979c64a589eed4428fcc3fc6a92a8627c659 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 10 May 2023 21:37:48 -0700
+Subject: bpf: Address KCSAN report on bpf_lru_list
+
+From: Martin KaFai Lau <martin.lau@kernel.org>
+
+[ Upstream commit ee9fd0ac3017c4313be91a220a9ac4c99dde7ad4 ]
+
+KCSAN reported a data-race when accessing node->ref.
+Although node->ref does not have to be accurate,
+take this chance to use a more common READ_ONCE() and WRITE_ONCE()
+pattern instead of data_race().
+
+There is an existing bpf_lru_node_is_ref() and bpf_lru_node_set_ref().
+This patch also adds bpf_lru_node_clear_ref() to do the
+WRITE_ONCE(node->ref, 0) also.
+
+==================================================================
+BUG: KCSAN: data-race in __bpf_lru_list_rotate / __htab_lru_percpu_map_update_elem
+
+write to 0xffff888137038deb of 1 bytes by task 11240 on cpu 1:
+__bpf_lru_node_move kernel/bpf/bpf_lru_list.c:113 [inline]
+__bpf_lru_list_rotate_active kernel/bpf/bpf_lru_list.c:149 [inline]
+__bpf_lru_list_rotate+0x1bf/0x750 kernel/bpf/bpf_lru_list.c:240
+bpf_lru_list_pop_free_to_local kernel/bpf/bpf_lru_list.c:329 [inline]
+bpf_common_lru_pop_free kernel/bpf/bpf_lru_list.c:447 [inline]
+bpf_lru_pop_free+0x638/0xe20 kernel/bpf/bpf_lru_list.c:499
+prealloc_lru_pop kernel/bpf/hashtab.c:290 [inline]
+__htab_lru_percpu_map_update_elem+0xe7/0x820 kernel/bpf/hashtab.c:1316
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff888137038deb of 1 bytes by task 11241 on cpu 0:
+bpf_lru_node_set_ref kernel/bpf/bpf_lru_list.h:70 [inline]
+__htab_lru_percpu_map_update_elem+0x2f1/0x820 kernel/bpf/hashtab.c:1332
+bpf_percpu_hash_update+0x5e/0x90 kernel/bpf/hashtab.c:2313
+bpf_map_update_value+0x2a9/0x370 kernel/bpf/syscall.c:200
+generic_map_update_batch+0x3ae/0x4f0 kernel/bpf/syscall.c:1687
+bpf_map_do_batch+0x2d9/0x3d0 kernel/bpf/syscall.c:4534
+__sys_bpf+0x338/0x810
+__do_sys_bpf kernel/bpf/syscall.c:5096 [inline]
+__se_sys_bpf kernel/bpf/syscall.c:5094 [inline]
+__x64_sys_bpf+0x43/0x50 kernel/bpf/syscall.c:5094
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x01 -> 0x00
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 11241 Comm: syz-executor.3 Not tainted 6.3.0-rc7-syzkaller-00136-g6a66fdd29ea1 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 03/30/2023
+==================================================================
+
+Reported-by: syzbot+ebe648a84e8784763f82@syzkaller.appspotmail.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/r/20230511043748.1384166-1-martin.lau@linux.dev
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/bpf_lru_list.c | 21 +++++++++++++--------
+ kernel/bpf/bpf_lru_list.h | 7 ++-----
+ 2 files changed, 15 insertions(+), 13 deletions(-)
+
+diff --git a/kernel/bpf/bpf_lru_list.c b/kernel/bpf/bpf_lru_list.c
+index d99e89f113c43..3dabdd137d102 100644
+--- a/kernel/bpf/bpf_lru_list.c
++++ b/kernel/bpf/bpf_lru_list.c
+@@ -41,7 +41,12 @@ static struct list_head *local_pending_list(struct bpf_lru_locallist *loc_l)
+ /* bpf_lru_node helpers */
+ static bool bpf_lru_node_is_ref(const struct bpf_lru_node *node)
+ {
+- return node->ref;
++ return READ_ONCE(node->ref);
++}
++
++static void bpf_lru_node_clear_ref(struct bpf_lru_node *node)
++{
++ WRITE_ONCE(node->ref, 0);
+ }
+
+ static void bpf_lru_list_count_inc(struct bpf_lru_list *l,
+@@ -89,7 +94,7 @@ static void __bpf_lru_node_move_in(struct bpf_lru_list *l,
+
+ bpf_lru_list_count_inc(l, tgt_type);
+ node->type = tgt_type;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ list_move(&node->list, &l->lists[tgt_type]);
+ }
+
+@@ -110,7 +115,7 @@ static void __bpf_lru_node_move(struct bpf_lru_list *l,
+ bpf_lru_list_count_inc(l, tgt_type);
+ node->type = tgt_type;
+ }
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+
+ /* If the moving node is the next_inactive_rotation candidate,
+ * move the next_inactive_rotation pointer also.
+@@ -353,7 +358,7 @@ static void __local_list_add_pending(struct bpf_lru *lru,
+ *(u32 *)((void *)node + lru->hash_offset) = hash;
+ node->cpu = cpu;
+ node->type = BPF_LRU_LOCAL_LIST_T_PENDING;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ list_add(&node->list, local_pending_list(loc_l));
+ }
+
+@@ -419,7 +424,7 @@ static struct bpf_lru_node *bpf_percpu_lru_pop_free(struct bpf_lru *lru,
+ if (!list_empty(free_list)) {
+ node = list_first_entry(free_list, struct bpf_lru_node, list);
+ *(u32 *)((void *)node + lru->hash_offset) = hash;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ __bpf_lru_node_move(l, node, BPF_LRU_LIST_T_INACTIVE);
+ }
+
+@@ -522,7 +527,7 @@ static void bpf_common_lru_push_free(struct bpf_lru *lru,
+ }
+
+ node->type = BPF_LRU_LOCAL_LIST_T_FREE;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ list_move(&node->list, local_free_list(loc_l));
+
+ raw_spin_unlock_irqrestore(&loc_l->lock, flags);
+@@ -568,7 +573,7 @@ static void bpf_common_lru_populate(struct bpf_lru *lru, void *buf,
+
+ node = (struct bpf_lru_node *)(buf + node_offset);
+ node->type = BPF_LRU_LIST_T_FREE;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+ buf += elem_size;
+ }
+@@ -594,7 +599,7 @@ static void bpf_percpu_lru_populate(struct bpf_lru *lru, void *buf,
+ node = (struct bpf_lru_node *)(buf + node_offset);
+ node->cpu = cpu;
+ node->type = BPF_LRU_LIST_T_FREE;
+- node->ref = 0;
++ bpf_lru_node_clear_ref(node);
+ list_add(&node->list, &l->lists[BPF_LRU_LIST_T_FREE]);
+ i++;
+ buf += elem_size;
+diff --git a/kernel/bpf/bpf_lru_list.h b/kernel/bpf/bpf_lru_list.h
+index 4ea227c9c1ade..8f3c8b2b4490e 100644
+--- a/kernel/bpf/bpf_lru_list.h
++++ b/kernel/bpf/bpf_lru_list.h
+@@ -64,11 +64,8 @@ struct bpf_lru {
+
+ static inline void bpf_lru_node_set_ref(struct bpf_lru_node *node)
+ {
+- /* ref is an approximation on access frequency. It does not
+- * have to be very accurate. Hence, no protection is used.
+- */
+- if (!node->ref)
+- node->ref = 1;
++ if (!READ_ONCE(node->ref))
++ WRITE_ONCE(node->ref, 1);
+ }
+
+ int bpf_lru_init(struct bpf_lru *lru, bool percpu, u32 hash_offset,
+--
+2.39.2
+
--- /dev/null
+From 0a9f7c72db338d808de8b35708d487940038ce8f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 09:49:31 -0700
+Subject: bpf, arm64: Fix BTI type used for freplace attached functions
+
+From: Alexander Duyck <alexanderduyck@fb.com>
+
+[ Upstream commit a3f25d614bc73b45e8f02adc6769876dfd16ca84 ]
+
+When running an freplace attached bpf program on an arm64 system w were
+seeing the following issue:
+ Unhandled 64-bit el1h sync exception on CPU47, ESR 0x0000000036000003 -- BTI
+
+After a bit of work to track it down I determined that what appeared to be
+happening is that the 'bti c' at the start of the program was somehow being
+reached after a 'br' instruction. Further digging pointed me toward the
+fact that the function was attached via freplace. This in turn led me to
+build_plt which I believe is invoking the long jump which is triggering
+this error.
+
+To resolve it we can replace the 'bti c' with 'bti jc' and add a comment
+explaining why this has to be modified as such.
+
+Fixes: b2ad54e1533e ("bpf, arm64: Implement bpf_arch_text_poke() for arm64")
+Signed-off-by: Alexander Duyck <alexanderduyck@fb.com>
+Acked-by: Xu Kuohai <xukuohai@huawei.com>
+Link: https://lore.kernel.org/r/168926677665.316237.9953845318337455525.stgit@ahduyck-xeon-server.home.arpa
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/arm64/net/bpf_jit_comp.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c
+index 8f16217c111c8..14134fd34ff79 100644
+--- a/arch/arm64/net/bpf_jit_comp.c
++++ b/arch/arm64/net/bpf_jit_comp.c
+@@ -322,7 +322,13 @@ static int build_prologue(struct jit_ctx *ctx, bool ebpf_from_cbpf)
+ *
+ */
+
+- emit_bti(A64_BTI_C, ctx);
++ /* bpf function may be invoked by 3 instruction types:
++ * 1. bl, attached via freplace to bpf prog via short jump
++ * 2. br, attached via freplace to bpf prog via long jump
++ * 3. blr, working as a function pointer, used by emit_call.
++ * So BTI_JC should used here to support both br and blr.
++ */
++ emit_bti(A64_BTI_JC, ctx);
+
+ emit(A64_MOV(1, A64_R(9), A64_LR), ctx);
+ emit(A64_NOP, ctx);
+--
+2.39.2
+
--- /dev/null
+From 6136de53109de1a3979843917ce4f9c78823e3e1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 21:45:28 +0530
+Subject: bpf: Fix subprog idx logic in check_max_stack_depth
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+[ Upstream commit ba7b3e7d5f9014be65879ede8fd599cb222901c9 ]
+
+The assignment to idx in check_max_stack_depth happens once we see a
+bpf_pseudo_call or bpf_pseudo_func. This is not an issue as the rest of
+the code performs a few checks and then pushes the frame to the frame
+stack, except the case of async callbacks. If the async callback case
+causes the loop iteration to be skipped, the idx assignment will be
+incorrect on the next iteration of the loop. The value stored in the
+frame stack (as the subprogno of the current subprog) will be incorrect.
+
+This leads to incorrect checks and incorrect tail_call_reachable
+marking. Save the target subprog in a new variable and only assign to
+idx once we are done with the is_async_cb check which may skip pushing
+of frame to the frame stack and subsequent stack depth checks and tail
+call markings.
+
+Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20230717161530.1238-2-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index 8c3ededef3172..fdba4086881b3 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4336,7 +4336,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ continue_func:
+ subprog_end = subprog[idx + 1].start;
+ for (; i < subprog_end; i++) {
+- int next_insn;
++ int next_insn, sidx;
+
+ if (!bpf_pseudo_call(insn + i) && !bpf_pseudo_func(insn + i))
+ continue;
+@@ -4346,14 +4346,14 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+
+ /* find the callee */
+ next_insn = i + insn[i].imm + 1;
+- idx = find_subprog(env, next_insn);
+- if (idx < 0) {
++ sidx = find_subprog(env, next_insn);
++ if (sidx < 0) {
+ WARN_ONCE(1, "verifier bug. No program starts at insn %d\n",
+ next_insn);
+ return -EFAULT;
+ }
+- if (subprog[idx].is_async_cb) {
+- if (subprog[idx].has_tail_call) {
++ if (subprog[sidx].is_async_cb) {
++ if (subprog[sidx].has_tail_call) {
+ verbose(env, "verifier bug. subprog has tail_call and async cb\n");
+ return -EFAULT;
+ }
+@@ -4362,6 +4362,7 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ continue;
+ }
+ i = next_insn;
++ idx = sidx;
+
+ if (subprog[idx].has_tail_call)
+ tail_call_reachable = true;
+--
+2.39.2
+
--- /dev/null
+From cb24f938e033cedcefaf283a9d5f44beb406005c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 2 May 2023 11:14:18 -0700
+Subject: bpf: Print a warning only if writing to unprivileged_bpf_disabled.
+
+From: Kui-Feng Lee <thinker.li@gmail.com>
+
+[ Upstream commit fedf99200ab086c42a572fca1d7266b06cdc3e3f ]
+
+Only print the warning message if you are writing to
+"/proc/sys/kernel/unprivileged_bpf_disabled".
+
+The kernel may print an annoying warning when you read
+"/proc/sys/kernel/unprivileged_bpf_disabled" saying
+
+ WARNING: Unprivileged eBPF is enabled with eIBRS on, data leaks possible
+ via Spectre v2 BHB attacks!
+
+However, this message is only meaningful when the feature is
+disabled or enabled.
+
+Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
+Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
+Acked-by: Yonghong Song <yhs@fb.com>
+Link: https://lore.kernel.org/bpf/20230502181418.308479-1-kuifeng@meta.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/syscall.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
+index 8633ec4f92df3..0c44a716f0a24 100644
+--- a/kernel/bpf/syscall.c
++++ b/kernel/bpf/syscall.c
+@@ -5289,7 +5289,8 @@ static int bpf_unpriv_handler(struct ctl_table *table, int write,
+ *(int *)table->data = unpriv_enable;
+ }
+
+- unpriv_ebpf_notify(unpriv_enable);
++ if (write)
++ unpriv_ebpf_notify(unpriv_enable);
+
+ return ret;
+ }
+--
+2.39.2
+
--- /dev/null
+From 765e8a472e267495e5ef26af7754684c76f6627f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 21:45:29 +0530
+Subject: bpf: Repeat check_max_stack_depth for async callbacks
+
+From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+
+[ Upstream commit b5e9ad522c4ccd32d322877515cff8d47ed731b9 ]
+
+While the check_max_stack_depth function explores call chains emanating
+from the main prog, which is typically enough to cover all possible call
+chains, it doesn't explore those rooted at async callbacks unless the
+async callback will have been directly called, since unlike non-async
+callbacks it skips their instruction exploration as they don't
+contribute to stack depth.
+
+It could be the case that the async callback leads to a callchain which
+exceeds the stack depth, but this is never reachable while only
+exploring the entry point from main subprog. Hence, repeat the check for
+the main subprog *and* all async callbacks marked by the symbolic
+execution pass of the verifier, as execution of the program may begin at
+any of them.
+
+Consider functions with following stack depths:
+main: 256
+async: 256
+foo: 256
+
+main:
+ rX = async
+ bpf_timer_set_callback(...)
+
+async:
+ foo()
+
+Here, async is not descended as it does not contribute to stack depth of
+main (since it is referenced using bpf_pseudo_func and not
+bpf_pseudo_call). However, when async is invoked asynchronously, it will
+end up breaching the MAX_BPF_STACK limit by calling foo.
+
+Hence, in addition to main, we also need to explore call chains
+beginning at all async callback subprogs in a program.
+
+Fixes: 7ddc80a476c2 ("bpf: Teach stack depth check about async callbacks.")
+Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
+Link: https://lore.kernel.org/r/20230717161530.1238-3-memxor@gmail.com
+Signed-off-by: Alexei Starovoitov <ast@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/bpf/verifier.c | 21 +++++++++++++++++++--
+ 1 file changed, 19 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
+index fdba4086881b3..f25ce959fae64 100644
+--- a/kernel/bpf/verifier.c
++++ b/kernel/bpf/verifier.c
+@@ -4288,16 +4288,17 @@ static int update_stack_depth(struct bpf_verifier_env *env,
+ * Since recursion is prevented by check_cfg() this algorithm
+ * only needs a local stack of MAX_CALL_FRAMES to remember callsites
+ */
+-static int check_max_stack_depth(struct bpf_verifier_env *env)
++static int check_max_stack_depth_subprog(struct bpf_verifier_env *env, int idx)
+ {
+- int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
+ struct bpf_subprog_info *subprog = env->subprog_info;
+ struct bpf_insn *insn = env->prog->insnsi;
++ int depth = 0, frame = 0, i, subprog_end;
+ bool tail_call_reachable = false;
+ int ret_insn[MAX_CALL_FRAMES];
+ int ret_prog[MAX_CALL_FRAMES];
+ int j;
+
++ i = subprog[idx].start;
+ process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+@@ -4398,6 +4399,22 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
+ goto continue_func;
+ }
+
++static int check_max_stack_depth(struct bpf_verifier_env *env)
++{
++ struct bpf_subprog_info *si = env->subprog_info;
++ int ret;
++
++ for (int i = 0; i < env->subprog_cnt; i++) {
++ if (!i || si[i].is_async_cb) {
++ ret = check_max_stack_depth_subprog(env, i);
++ if (ret < 0)
++ return ret;
++ }
++ continue;
++ }
++ return 0;
++}
++
+ #ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ static int get_callee_stack_depth(struct bpf_verifier_env *env,
+ const struct bpf_insn *insn, int idx)
+--
+2.39.2
+
--- /dev/null
+From 76b79c254cf2d798a26a7e99c73226b2df0ff1bb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 22:51:49 +0000
+Subject: bpf: tcp: Avoid taking fast sock lock in iterator
+
+From: Aditi Ghag <aditi.ghag@isovalent.com>
+
+[ Upstream commit 9378096e8a656fb5c4099b26b1370c56f056eab9 ]
+
+This is a preparatory commit to replace `lock_sock_fast` with
+`lock_sock`,and facilitate BPF programs executed from the TCP sockets
+iterator to be able to destroy TCP sockets using the bpf_sock_destroy
+kfunc (implemented in follow-up commits).
+
+Previously, BPF TCP iterator was acquiring the sock lock with BH
+disabled. This led to scenarios where the sockets hash table bucket lock
+can be acquired with BH enabled in some path versus disabled in other.
+In such situation, kernel issued a warning since it thinks that in the
+BH enabled path the same bucket lock *might* be acquired again in the
+softirq context (BH disabled), which will lead to a potential dead lock.
+Since bpf_sock_destroy also happens in a process context, the potential
+deadlock warning is likely a false alarm.
+
+Here is a snippet of annotated stack trace that motivated this change:
+
+```
+
+Possible interrupt unsafe locking scenario:
+
+ CPU0 CPU1
+ ---- ----
+ lock(&h->lhash2[i].lock);
+ local_bh_disable();
+ lock(&h->lhash2[i].lock);
+kernel imagined possible scenario:
+ local_bh_disable(); /* Possible softirq */
+ lock(&h->lhash2[i].lock);
+*** Potential Deadlock ***
+
+process context:
+
+lock_acquire+0xcd/0x330
+_raw_spin_lock+0x33/0x40
+------> Acquire (bucket) lhash2.lock with BH enabled
+__inet_hash+0x4b/0x210
+inet_csk_listen_start+0xe6/0x100
+inet_listen+0x95/0x1d0
+__sys_listen+0x69/0xb0
+__x64_sys_listen+0x14/0x20
+do_syscall_64+0x3c/0x90
+entry_SYSCALL_64_after_hwframe+0x72/0xdc
+
+bpf_sock_destroy run from iterator:
+
+lock_acquire+0xcd/0x330
+_raw_spin_lock+0x33/0x40
+------> Acquire (bucket) lhash2.lock with BH disabled
+inet_unhash+0x9a/0x110
+tcp_set_state+0x6a/0x210
+tcp_abort+0x10d/0x200
+bpf_prog_6793c5ca50c43c0d_iter_tcp6_server+0xa4/0xa9
+bpf_iter_run_prog+0x1ff/0x340
+------> lock_sock_fast that acquires sock lock with BH disabled
+bpf_iter_tcp_seq_show+0xca/0x190
+bpf_seq_read+0x177/0x450
+
+```
+
+Also, Yonghong reported a deadlock for non-listening TCP sockets that
+this change resolves. Previously, `lock_sock_fast` held the sock spin
+lock with BH which was again being acquired in `tcp_abort`:
+
+```
+watchdog: BUG: soft lockup - CPU#0 stuck for 86s! [test_progs:2331]
+RIP: 0010:queued_spin_lock_slowpath+0xd8/0x500
+Call Trace:
+ <TASK>
+ _raw_spin_lock+0x84/0x90
+ tcp_abort+0x13c/0x1f0
+ bpf_prog_88539c5453a9dd47_iter_tcp6_client+0x82/0x89
+ bpf_iter_run_prog+0x1aa/0x2c0
+ ? preempt_count_sub+0x1c/0xd0
+ ? from_kuid_munged+0x1c8/0x210
+ bpf_iter_tcp_seq_show+0x14e/0x1b0
+ bpf_seq_read+0x36c/0x6a0
+
+bpf_iter_tcp_seq_show
+ lock_sock_fast
+ __lock_sock_fast
+ spin_lock_bh(&sk->sk_lock.slock);
+ /* * Fast path return with bottom halves disabled and * sock::sk_lock.slock held.* */
+
+ ...
+ tcp_abort
+ local_bh_disable();
+ spin_lock(&((sk)->sk_lock.slock)); // from bh_lock_sock(sk)
+
+```
+
+With the switch to `lock_sock`, it calls `spin_unlock_bh` before returning:
+
+```
+lock_sock
+ lock_sock_nested
+ spin_lock_bh(&sk->sk_lock.slock);
+ :
+ spin_unlock_bh(&sk->sk_lock.slock);
+```
+
+Acked-by: Yonghong Song <yhs@meta.com>
+Acked-by: Stanislav Fomichev <sdf@google.com>
+Signed-off-by: Aditi Ghag <aditi.ghag@isovalent.com>
+Link: https://lore.kernel.org/r/20230519225157.760788-2-aditi.ghag@isovalent.com
+Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 5 ++---
+ 1 file changed, 2 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index b37c1bcb15097..a7de5ba74e7f7 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2911,7 +2911,6 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+ struct sock *sk = v;
+- bool slow;
+ uid_t uid;
+ int ret;
+
+@@ -2919,7 +2918,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+ return 0;
+
+ if (sk_fullsock(sk))
+- slow = lock_sock_fast(sk);
++ lock_sock(sk);
+
+ if (unlikely(sk_unhashed(sk))) {
+ ret = SEQ_SKIP;
+@@ -2943,7 +2942,7 @@ static int bpf_iter_tcp_seq_show(struct seq_file *seq, void *v)
+
+ unlock:
+ if (sk_fullsock(sk))
+- unlock_sock_fast(sk, slow);
++ release_sock(sk);
+ return ret;
+
+ }
+--
+2.39.2
+
--- /dev/null
+From 5841124edbf8b166987956c008ec9eafe491d36b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 08:44:49 -0700
+Subject: bridge: Add extack warning when enabling STP in netns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 56a16035bb6effb37177867cea94c13a8382f745 ]
+
+When we create an L2 loop on a bridge in netns, we will see packets storm
+even if STP is enabled.
+
+ # unshare -n
+ # ip link add br0 type bridge
+ # ip link add veth0 type veth peer name veth1
+ # ip link set veth0 master br0 up
+ # ip link set veth1 master br0 up
+ # ip link set br0 type bridge stp_state 1
+ # ip link set br0 up
+ # sleep 30
+ # ip -s link show br0
+ 2: br0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc noqueue state UP mode DEFAULT group default qlen 1000
+ link/ether b6:61:98:1c:1c:b5 brd ff:ff:ff:ff:ff:ff
+ RX: bytes packets errors dropped missed mcast
+ 956553768 12861249 0 0 0 12861249 <-. Keep
+ TX: bytes packets errors dropped carrier collsns | increasing
+ 1027834 11951 0 0 0 0 <-' rapidly
+
+This is because llc_rcv() drops all packets in non-root netns and BPDU
+is dropped.
+
+Let's add extack warning when enabling STP in netns.
+
+ # unshare -n
+ # ip link add br0 type bridge
+ # ip link set br0 type bridge stp_state 1
+ Warning: bridge: STP does not work in non-root netns.
+
+Note this commit will be reverted later when we namespacify the whole LLC
+infra.
+
+Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe")
+Suggested-by: Harry Coin <hcoin@quietfountain.com>
+Link: https://lore.kernel.org/netdev/0f531295-e289-022d-5add-5ceffa0df9bc@quietfountain.com/
+Suggested-by: Ido Schimmel <idosch@idosch.org>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Acked-by: Nikolay Aleksandrov <razor@blackwall.org>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/bridge/br_stp_if.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
+index 75204d36d7f90..b65962682771f 100644
+--- a/net/bridge/br_stp_if.c
++++ b/net/bridge/br_stp_if.c
+@@ -201,6 +201,9 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+ {
+ ASSERT_RTNL();
+
++ if (!net_eq(dev_net(br->dev), &init_net))
++ NL_SET_ERR_MSG_MOD(extack, "STP does not work in non-root netns");
++
+ if (br_mrp_enabled(br)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "STP can't be enabled if MRP is already enabled");
+--
+2.39.2
+
--- /dev/null
+From 34038040cc781e64ecfa341e776b1d3ca1839d8a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 27 Jun 2023 08:13:23 +0200
+Subject: btrfs: be a bit more careful when setting mirror_num_ret in
+ btrfs_map_block
+
+From: Christoph Hellwig <hch@lst.de>
+
+[ Upstream commit 4e7de35eb7d1a1d4f2dda15f39fbedd4798a0b8d ]
+
+The mirror_num_ret is allowed to be NULL, although it has to be set when
+smap is set. Unfortunately that is not a well enough specifiable
+invariant for static type checkers, so add a NULL check to make sure they
+are fine.
+
+Fixes: 03793cbbc80f ("btrfs: add fast path for single device io in __btrfs_map_block")
+Reported-by: Dan Carpenter <dan.carpenter@linaro.org>
+Reviewed-by: Qu Wenruo <wqu@suse.com>
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Signed-off-by: Christoph Hellwig <hch@lst.de>
+Reviewed-by: David Sterba <dsterba@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/volumes.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 7433ae929fdcb..2e0832d70406c 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -6595,11 +6595,13 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
+ if (patch_the_first_stripe_for_dev_replace) {
+ smap->dev = dev_replace->tgtdev;
+ smap->physical = physical_to_patch_in_first_stripe;
+- *mirror_num_ret = map->num_stripes + 1;
++ if (mirror_num_ret)
++ *mirror_num_ret = map->num_stripes + 1;
+ } else {
+ set_io_stripe(smap, map, stripe_index, stripe_offset,
+ stripe_nr);
+- *mirror_num_ret = mirror_num;
++ if (mirror_num_ret)
++ *mirror_num_ret = mirror_num;
+ }
+ *bioc_ret = NULL;
+ ret = 0;
+--
+2.39.2
+
--- /dev/null
+From 7a8eaa17077746c57f6fa160701348e82e480ae9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Jul 2023 08:56:33 +0000
+Subject: cifs: fix mid leak during reconnection after timeout threshold
+
+From: Shyam Prasad N <nspmangalore@gmail.com>
+
+[ Upstream commit 69cba9d3c1284e0838ae408830a02c4a063104bc ]
+
+When the number of responses with status of STATUS_IO_TIMEOUT
+exceeds a specified threshold (NUM_STATUS_IO_TIMEOUT), we reconnect
+the connection. But we do not return the mid, or the credits
+returned for the mid, or reduce the number of in-flight requests.
+
+This bug could result in the server->in_flight count to go bad,
+and also cause a leak in the mids.
+
+This change moves the check to a few lines below where the
+response is decrypted, even of the response is read from the
+transform header. This way, the code for returning the mids
+can be reused.
+
+Also, the cifs_reconnect was reconnecting just the transport
+connection before. In case of multi-channel, this may not be
+what we want to do after several timeouts. Changed that to
+reconnect the session and the tree too.
+
+Also renamed NUM_STATUS_IO_TIMEOUT to a more appropriate name
+MAX_STATUS_IO_TIMEOUT.
+
+Fixes: 8e670f77c4a5 ("Handle STATUS_IO_TIMEOUT gracefully")
+Signed-off-by: Shyam Prasad N <sprasad@microsoft.com>
+Signed-off-by: Steve French <stfrench@microsoft.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/smb/client/connect.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c
+index 935fe198a4baf..cbe08948baf4a 100644
+--- a/fs/smb/client/connect.c
++++ b/fs/smb/client/connect.c
+@@ -59,7 +59,7 @@ extern bool disable_legacy_dialects;
+ #define TLINK_IDLE_EXPIRE (600 * HZ)
+
+ /* Drop the connection to not overload the server */
+-#define NUM_STATUS_IO_TIMEOUT 5
++#define MAX_STATUS_IO_TIMEOUT 5
+
+ struct mount_ctx {
+ struct cifs_sb_info *cifs_sb;
+@@ -1162,6 +1162,7 @@ cifs_demultiplex_thread(void *p)
+ struct mid_q_entry *mids[MAX_COMPOUND];
+ char *bufs[MAX_COMPOUND];
+ unsigned int noreclaim_flag, num_io_timeout = 0;
++ bool pending_reconnect = false;
+
+ noreclaim_flag = memalloc_noreclaim_save();
+ cifs_dbg(FYI, "Demultiplex PID: %d\n", task_pid_nr(current));
+@@ -1201,6 +1202,8 @@ cifs_demultiplex_thread(void *p)
+ cifs_dbg(FYI, "RFC1002 header 0x%x\n", pdu_length);
+ if (!is_smb_response(server, buf[0]))
+ continue;
++
++ pending_reconnect = false;
+ next_pdu:
+ server->pdu_size = pdu_length;
+
+@@ -1258,10 +1261,13 @@ cifs_demultiplex_thread(void *p)
+ if (server->ops->is_status_io_timeout &&
+ server->ops->is_status_io_timeout(buf)) {
+ num_io_timeout++;
+- if (num_io_timeout > NUM_STATUS_IO_TIMEOUT) {
+- cifs_reconnect(server, false);
++ if (num_io_timeout > MAX_STATUS_IO_TIMEOUT) {
++ cifs_server_dbg(VFS,
++ "Number of request timeouts exceeded %d. Reconnecting",
++ MAX_STATUS_IO_TIMEOUT);
++
++ pending_reconnect = true;
+ num_io_timeout = 0;
+- continue;
+ }
+ }
+
+@@ -1308,6 +1314,11 @@ cifs_demultiplex_thread(void *p)
+ buf = server->smallbuf;
+ goto next_pdu;
+ }
++
++ /* do this reconnect at the very end after processing all MIDs */
++ if (pending_reconnect)
++ cifs_reconnect(server, true);
++
+ } /* end while !EXITING */
+
+ /* buffer usually freed in free_mid - need to free it here on exit */
+--
+2.39.2
+
--- /dev/null
+From 4aca3a9686777cc7cbeeafbea29e9349e546bc92 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 11:54:47 +0200
+Subject: devlink: report devlink_port_type_warn source device
+
+From: Petr Oros <poros@redhat.com>
+
+[ Upstream commit a52305a81d6bb74b90b400dfa56455d37872fe4b ]
+
+devlink_port_type_warn is scheduled for port devlink and warning
+when the port type is not set. But from this warning it is not easy
+found out which device (driver) has no devlink port set.
+
+[ 3709.975552] Type was not set for devlink port.
+[ 3709.975579] WARNING: CPU: 1 PID: 13092 at net/devlink/leftover.c:6775 devlink_port_type_warn+0x11/0x20
+[ 3709.993967] Modules linked in: openvswitch nf_conncount nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 nfnetlink bluetooth rpcsec_gss_krb5 auth_rpcgss nfsv4 dns_resolver nfs lockd grace fscache netfs vhost_net vhost vhost_iotlb tap tun bridge stp llc qrtr intel_rapl_msr intel_rapl_common i10nm_edac nfit libnvdimm x86_pkg_temp_thermal mlx5_ib intel_powerclamp coretemp dell_wmi ledtrig_audio sparse_keymap ipmi_ssif kvm_intel ib_uverbs rfkill ib_core video kvm iTCO_wdt acpi_ipmi intel_vsec irqbypass ipmi_si iTCO_vendor_support dcdbas ipmi_devintf mei_me ipmi_msghandler rapl mei intel_cstate isst_if_mmio isst_if_mbox_pci dell_smbios intel_uncore isst_if_common i2c_i801 dell_wmi_descriptor wmi_bmof i2c_smbus intel_pch_thermal pcspkr acpi_power_meter xfs libcrc32c sd_mod sg nvme_tcp mgag200 i2c_algo_bit nvme_fabrics drm_shmem_helper drm_kms_helper nvme syscopyarea ahci sysfillrect sysimgblt nvme_core fb_sys_fops crct10dif_pclmul libahci mlx5_core sfc crc32_pclmul nvme_common drm
+[ 3709.994030] crc32c_intel mtd t10_pi mlxfw libata tg3 mdio megaraid_sas psample ghash_clmulni_intel pci_hyperv_intf wmi dm_multipath sunrpc dm_mirror dm_region_hash dm_log dm_mod be2iscsi bnx2i cnic uio cxgb4i cxgb4 tls libcxgbi libcxgb qla4xxx iscsi_boot_sysfs iscsi_tcp libiscsi_tcp libiscsi scsi_transport_iscsi fuse
+[ 3710.108431] CPU: 1 PID: 13092 Comm: kworker/1:1 Kdump: loaded Not tainted 5.14.0-319.el9.x86_64 #1
+[ 3710.108435] Hardware name: Dell Inc. PowerEdge R750/0PJ80M, BIOS 1.8.2 09/14/2022
+[ 3710.108437] Workqueue: events devlink_port_type_warn
+[ 3710.108440] RIP: 0010:devlink_port_type_warn+0x11/0x20
+[ 3710.108443] Code: 84 76 fe ff ff 48 c7 03 20 0e 1a ad 31 c0 e9 96 fd ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 48 c7 c7 18 24 4e ad e8 ef 71 62 ff <0f> 0b c3 cc cc cc cc 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 f6 87
+[ 3710.108445] RSP: 0018:ff3b6d2e8b3c7e90 EFLAGS: 00010282
+[ 3710.108447] RAX: 0000000000000000 RBX: ff366d6580127080 RCX: 0000000000000027
+[ 3710.108448] RDX: 0000000000000027 RSI: 00000000ffff86de RDI: ff366d753f41f8c8
+[ 3710.108449] RBP: ff366d658ff5a0c0 R08: ff366d753f41f8c0 R09: ff3b6d2e8b3c7e18
+[ 3710.108450] R10: 0000000000000001 R11: 0000000000000023 R12: ff366d753f430600
+[ 3710.108451] R13: ff366d753f436900 R14: 0000000000000000 R15: ff366d753f436905
+[ 3710.108452] FS: 0000000000000000(0000) GS:ff366d753f400000(0000) knlGS:0000000000000000
+[ 3710.108453] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+[ 3710.108454] CR2: 00007f1c57bc74e0 CR3: 000000111d26a001 CR4: 0000000000773ee0
+[ 3710.108456] PKRU: 55555554
+[ 3710.108457] Call Trace:
+[ 3710.108458] <TASK>
+[ 3710.108459] process_one_work+0x1e2/0x3b0
+[ 3710.108466] ? rescuer_thread+0x390/0x390
+[ 3710.108468] worker_thread+0x50/0x3a0
+[ 3710.108471] ? rescuer_thread+0x390/0x390
+[ 3710.108473] kthread+0xdd/0x100
+[ 3710.108477] ? kthread_complete_and_exit+0x20/0x20
+[ 3710.108479] ret_from_fork+0x1f/0x30
+[ 3710.108485] </TASK>
+[ 3710.108486] ---[ end trace 1b4b23cd0c65d6a0 ]---
+
+After patch:
+[ 402.473064] ice 0000:41:00.0: Type was not set for devlink port.
+[ 402.473064] ice 0000:41:00.1: Type was not set for devlink port.
+
+Signed-off-by: Petr Oros <poros@redhat.com>
+Reviewed-by: Pavan Chebbi <pavan.chebbi@broadcom.com>
+Reviewed-by: Jakub Kicinski <kuba@kernel.org>
+Link: https://lore.kernel.org/r/20230615095447.8259-1-poros@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/devlink.c | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/core/devlink.c b/net/core/devlink.c
+index 2aa77d4b80d0a..5a4a4b34ac15c 100644
+--- a/net/core/devlink.c
++++ b/net/core/devlink.c
+@@ -9826,7 +9826,10 @@ EXPORT_SYMBOL_GPL(devlink_free);
+
+ static void devlink_port_type_warn(struct work_struct *work)
+ {
+- WARN(true, "Type was not set for devlink port.");
++ struct devlink_port *port = container_of(to_delayed_work(work),
++ struct devlink_port,
++ type_warn_dw);
++ dev_warn(port->devlink->dev, "Type was not set for devlink port.");
+ }
+
+ static bool devlink_port_type_should_warn(struct devlink_port *devlink_port)
+--
+2.39.2
+
--- /dev/null
+From 20d5e3268aeb5cd2827f61521d33a0203f680509 Mon Sep 17 00:00:00 2001
+From: hackyzh002 <hackyzh002@gmail.com>
+Date: Wed, 19 Apr 2023 20:20:58 +0800
+Subject: [PATCH AUTOSEL 4.19 01/11] drm/radeon: Fix integer overflow in
+ radeon_cs_parser_init
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit f828b681d0cd566f86351c0b913e6cb6ed8c7b9c ]
+
+The type of size is unsigned, if size is 0x40000000, there will be an
+integer overflow, size will be zero after size *= sizeof(uint32_t),
+will cause uninitialized memory to be referenced later
+
+Reviewed-by: Christian König <christian.koenig@amd.com>
+Signed-off-by: hackyzh002 <hackyzh002@gmail.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/radeon/radeon_cs.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
+index 1ae31dbc61c64..5e61abb3dce5c 100644
+--- a/drivers/gpu/drm/radeon/radeon_cs.c
++++ b/drivers/gpu/drm/radeon/radeon_cs.c
+@@ -265,7 +265,8 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
+ {
+ struct drm_radeon_cs *cs = data;
+ uint64_t *chunk_array_ptr;
+- unsigned size, i;
++ u64 size;
++ unsigned i;
+ u32 ring = RADEON_CS_RING_GFX;
+ s32 priority = 0;
+
+--
+2.39.2
+
--- /dev/null
+From 9bbaa84ecaeca40ae4d2d1cd4ab363546113da7a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 00:34:05 +0200
+Subject: dsa: mv88e6xxx: Do a final check before timing out
+
+From: Linus Walleij <linus.walleij@linaro.org>
+
+[ Upstream commit 95ce158b6c93b28842b54b42ad1cb221b9844062 ]
+
+I get sporadic timeouts from the driver when using the
+MV88E6352. Reading the status again after the loop fixes the
+problem: the operation is successful but goes undetected.
+
+Some added prints show things like this:
+
+[ 58.356209] mv88e6085 mdio_mux-0.1:00: Timeout while waiting
+ for switch, addr 1b reg 0b, mask 8000, val 0000, data c000
+[ 58.367487] mv88e6085 mdio_mux-0.1:00: Timeout waiting for
+ ATU op 4000, fid 0001
+(...)
+[ 61.826293] mv88e6085 mdio_mux-0.1:00: Timeout while waiting
+ for switch, addr 1c reg 18, mask 8000, val 0000, data 9860
+[ 61.837560] mv88e6085 mdio_mux-0.1:00: Timeout waiting
+ for PHY command 1860 to complete
+
+The reason is probably not the commands: I think those are
+mostly fine with the 50+50ms timeout, but the problem
+appears when OpenWrt brings up several interfaces in
+parallel on a system with 7 populated ports: if one of
+them take more than 50 ms and waits one or more of the
+others can get stuck on the mutex for the switch and then
+this can easily multiply.
+
+As we sleep and wait, the function loop needs a final
+check after exiting the loop if we were successful.
+
+Suggested-by: Andrew Lunn <andrew@lunn.ch>
+Cc: Tobias Waldekranz <tobias@waldekranz.com>
+Fixes: 35da1dfd9484 ("net: dsa: mv88e6xxx: Improve performance of busy bit polling")
+Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
+Reviewed-by: Andrew Lunn <andrew@lunn.ch>
+Link: https://lore.kernel.org/r/20230712223405.861899-1-linus.walleij@linaro.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/mv88e6xxx/chip.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
+index 4db1652015d1d..b69bd44ada1f2 100644
+--- a/drivers/net/dsa/mv88e6xxx/chip.c
++++ b/drivers/net/dsa/mv88e6xxx/chip.c
+@@ -109,6 +109,13 @@ int mv88e6xxx_wait_mask(struct mv88e6xxx_chip *chip, int addr, int reg,
+ usleep_range(1000, 2000);
+ }
+
++ err = mv88e6xxx_read(chip, addr, reg, &data);
++ if (err)
++ return err;
++
++ if ((data & mask) == val)
++ return 0;
++
+ dev_err(chip->dev, "Timeout while waiting for switch\n");
+ return -ETIMEDOUT;
+ }
+--
+2.39.2
+
--- /dev/null
+From 3f351b5e8558e6d06eb00f3a0b3ce2ac4d1bd613 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sat, 15 Jul 2023 16:16:56 +0800
+Subject: fbdev: au1200fb: Fix missing IRQ check in au1200fb_drv_probe
+
+From: Zhang Shurong <zhang_shurong@foxmail.com>
+
+[ Upstream commit 4e88761f5f8c7869f15a2046b1a1116f4fab4ac8 ]
+
+This func misses checking for platform_get_irq()'s call and may passes the
+negative error codes to request_irq(), which takes unsigned IRQ #,
+causing it to fail with -EINVAL, overriding an original error code.
+
+Fix this by stop calling request_irq() with invalid IRQ #s.
+
+Fixes: 1630d85a8312 ("au1200fb: fix hardcoded IRQ")
+Signed-off-by: Zhang Shurong <zhang_shurong@foxmail.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/au1200fb.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/drivers/video/fbdev/au1200fb.c b/drivers/video/fbdev/au1200fb.c
+index b6b22fa4a8a01..fd3ff398d234a 100644
+--- a/drivers/video/fbdev/au1200fb.c
++++ b/drivers/video/fbdev/au1200fb.c
+@@ -1732,6 +1732,9 @@ static int au1200fb_drv_probe(struct platform_device *dev)
+
+ /* Now hook interrupt too */
+ irq = platform_get_irq(dev, 0);
++ if (irq < 0)
++ return irq;
++
+ ret = request_irq(irq, au1200fb_handle_irq,
+ IRQF_SHARED, "lcd", (void *)dev);
+ if (ret) {
+--
+2.39.2
+
--- /dev/null
+From 37392063869cec1e0f260e3d3edc86270b958c95 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 10 Jul 2023 21:19:58 +0800
+Subject: fbdev: imxfb: Removed unneeded release_mem_region
+
+From: Yangtao Li <frank.li@vivo.com>
+
+[ Upstream commit 45fcc058a75bf5d65cf4c32da44a252fbe873cd4 ]
+
+Remove unnecessary release_mem_region from the error path to prevent
+mem region from being released twice, which could avoid resource leak
+or other unexpected issues.
+
+Fixes: b083c22d5114 ("video: fbdev: imxfb: Convert request_mem_region + ioremap to devm_ioremap_resource")
+Signed-off-by: Yangtao Li <frank.li@vivo.com>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imxfb.c | 1 -
+ 1 file changed, 1 deletion(-)
+
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index 61731921011d5..36ada87b49a49 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -1043,7 +1043,6 @@ static int imxfb_probe(struct platform_device *pdev)
+ failed_map:
+ failed_ioremap:
+ failed_getclock:
+- release_mem_region(res->start, resource_size(res));
+ failed_of_parse:
+ kfree(info->pseudo_palette);
+ failed_init:
+--
+2.39.2
+
--- /dev/null
+From c6e2909b7334117823ea14b1738ea3584813e756 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 15:24:37 +0200
+Subject: fbdev: imxfb: warn about invalid left/right margin
+
+From: Martin Kaiser <martin@kaiser.cx>
+
+[ Upstream commit 4e47382fbca916d7db95cbf9e2d7ca2e9d1ca3fe ]
+
+Warn about invalid var->left_margin or var->right_margin. Their values
+are read from the device tree.
+
+We store var->left_margin-3 and var->right_margin-1 in register
+fields. These fields should be >= 0.
+
+Fixes: 7e8549bcee00 ("imxfb: Fix margin settings")
+Signed-off-by: Martin Kaiser <martin@kaiser.cx>
+Signed-off-by: Helge Deller <deller@gmx.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/video/fbdev/imxfb.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/video/fbdev/imxfb.c b/drivers/video/fbdev/imxfb.c
+index 51fde1b2a7938..61731921011d5 100644
+--- a/drivers/video/fbdev/imxfb.c
++++ b/drivers/video/fbdev/imxfb.c
+@@ -613,10 +613,10 @@ static int imxfb_activate_var(struct fb_var_screeninfo *var, struct fb_info *inf
+ if (var->hsync_len < 1 || var->hsync_len > 64)
+ printk(KERN_ERR "%s: invalid hsync_len %d\n",
+ info->fix.id, var->hsync_len);
+- if (var->left_margin > 255)
++ if (var->left_margin < 3 || var->left_margin > 255)
+ printk(KERN_ERR "%s: invalid left_margin %d\n",
+ info->fix.id, var->left_margin);
+- if (var->right_margin > 255)
++ if (var->right_margin < 1 || var->right_margin > 255)
+ printk(KERN_ERR "%s: invalid right_margin %d\n",
+ info->fix.id, var->right_margin);
+ if (var->yres < 1 || var->yres > ymax_mask)
+--
+2.39.2
+
--- /dev/null
+From ffb509c36e5b36da98c9fb1f8f539f0cbf606665 Mon Sep 17 00:00:00 2001
+From: Immad Mir <mirimmad17@gmail.com>
+Date: Fri, 23 Jun 2023 19:17:08 +0530
+Subject: [PATCH AUTOSEL 4.19 11/11] FS: JFS: Check for read-only mounted
+ filesystem in txBegin
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 95e2b352c03b0a86c5717ba1d24ea20969abcacc ]
+
+ This patch adds a check for read-only mounted filesystem
+ in txBegin before starting a transaction potentially saving
+ from NULL pointer deref.
+
+Signed-off-by: Immad Mir <mirimmad17@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_txnmgr.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
+index 78789c5ed36b0..e10db01f253b8 100644
+--- a/fs/jfs/jfs_txnmgr.c
++++ b/fs/jfs/jfs_txnmgr.c
+@@ -367,6 +367,11 @@ tid_t txBegin(struct super_block *sb, int flag)
+ jfs_info("txBegin: flag = 0x%x", flag);
+ log = JFS_SBI(sb)->log;
+
++ if (!log) {
++ jfs_error(sb, "read-only filesystem\n");
++ return 0;
++ }
++
+ TXN_LOCK();
+
+ INCREMENT(TxStat.txBegin);
+--
+2.39.2
+
--- /dev/null
+From ced92b3b30ff868a14d5763842e5299bdad70edb Mon Sep 17 00:00:00 2001
+From: Immad Mir <mirimmad17@gmail.com>
+Date: Fri, 23 Jun 2023 19:14:01 +0530
+Subject: [PATCH AUTOSEL 4.19 10/11] FS: JFS: Fix null-ptr-deref Read in
+ txBegin
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 47cfdc338d674d38f4b2f22b7612cc6a2763ba27 ]
+
+ Syzkaller reported an issue where txBegin may be called
+ on a superblock in a read-only mounted filesystem which leads
+ to NULL pointer deref. This could be solved by checking if
+ the filesystem is read-only before calling txBegin, and returning
+ with appropiate error code.
+
+Reported-By: syzbot+f1faa20eec55e0c8644c@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?id=be7e52c50c5182cc09a09ea6fc456446b2039de3
+
+Signed-off-by: Immad Mir <mirimmad17@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/namei.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c
+index 14528c0ffe635..c2c439acbb780 100644
+--- a/fs/jfs/namei.c
++++ b/fs/jfs/namei.c
+@@ -811,6 +811,11 @@ static int jfs_link(struct dentry *old_dentry,
+ if (rc)
+ goto out;
+
++ if (isReadOnly(ip)) {
++ jfs_error(ip->i_sb, "read-only filesystem\n");
++ return -EROFS;
++ }
++
+ tid = txBegin(ip->i_sb, 0);
+
+ mutex_lock_nested(&JFS_IP(dir)->commit_mutex, COMMIT_MUTEX_PARENT);
+--
+2.39.2
+
--- /dev/null
+From 35a29fcb694a5f3ee27d66f57f19795b367fd883 Mon Sep 17 00:00:00 2001
+From: Yogesh <yogi.kernel@gmail.com>
+Date: Thu, 22 Jun 2023 00:07:03 +0530
+Subject: [PATCH AUTOSEL 4.19 08/11] fs: jfs: Fix UBSAN:
+ array-index-out-of-bounds in dbAllocDmapLev
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 4e302336d5ca1767a06beee7596a72d3bdc8d983 ]
+
+Syzkaller reported the following issue:
+
+UBSAN: array-index-out-of-bounds in fs/jfs/jfs_dmap.c:1965:6
+index -84 is out of range for type 's8[341]' (aka 'signed char[341]')
+CPU: 1 PID: 4995 Comm: syz-executor146 Not tainted 6.4.0-rc6-syzkaller-00037-gb6dad5178cea #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 05/27/2023
+Call Trace:
+ <TASK>
+ __dump_stack lib/dump_stack.c:88 [inline]
+ dump_stack_lvl+0x1e7/0x2d0 lib/dump_stack.c:106
+ ubsan_epilogue lib/ubsan.c:217 [inline]
+ __ubsan_handle_out_of_bounds+0x11c/0x150 lib/ubsan.c:348
+ dbAllocDmapLev+0x3e5/0x430 fs/jfs/jfs_dmap.c:1965
+ dbAllocCtl+0x113/0x920 fs/jfs/jfs_dmap.c:1809
+ dbAllocAG+0x28f/0x10b0 fs/jfs/jfs_dmap.c:1350
+ dbAlloc+0x658/0xca0 fs/jfs/jfs_dmap.c:874
+ dtSplitUp fs/jfs/jfs_dtree.c:974 [inline]
+ dtInsert+0xda7/0x6b00 fs/jfs/jfs_dtree.c:863
+ jfs_create+0x7b6/0xbb0 fs/jfs/namei.c:137
+ lookup_open fs/namei.c:3492 [inline]
+ open_last_lookups fs/namei.c:3560 [inline]
+ path_openat+0x13df/0x3170 fs/namei.c:3788
+ do_filp_open+0x234/0x490 fs/namei.c:3818
+ do_sys_openat2+0x13f/0x500 fs/open.c:1356
+ do_sys_open fs/open.c:1372 [inline]
+ __do_sys_openat fs/open.c:1388 [inline]
+ __se_sys_openat fs/open.c:1383 [inline]
+ __x64_sys_openat+0x247/0x290 fs/open.c:1383
+ do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+ do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+RIP: 0033:0x7f1f4e33f7e9
+Code: 28 00 00 00 75 05 48 83 c4 28 c3 e8 51 14 00 00 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48
+RSP: 002b:00007ffc21129578 EFLAGS: 00000246 ORIG_RAX: 0000000000000101
+RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007f1f4e33f7e9
+RDX: 000000000000275a RSI: 0000000020000040 RDI: 00000000ffffff9c
+RBP: 00007f1f4e2ff080 R08: 0000000000000000 R09: 0000000000000000
+R10: 0000000000000000 R11: 0000000000000246 R12: 00007f1f4e2ff110
+R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000000
+ </TASK>
+
+The bug occurs when the dbAllocDmapLev()function attempts to access
+dp->tree.stree[leafidx + LEAFIND] while the leafidx value is negative.
+
+To rectify this, the patch introduces a safeguard within the
+dbAllocDmapLev() function. A check has been added to verify if leafidx is
+negative. If it is, the function immediately returns an I/O error, preventing
+any further execution that could potentially cause harm.
+
+Tested via syzbot.
+
+Reported-by: syzbot+853a6f4dfa3cf37d3aea@syzkaller.appspotmail.com
+Link: https://syzkaller.appspot.com/bug?extid=ae2f5a27a07ae44b0f17
+Signed-off-by: Yogesh <yogi.kernel@gmail.com>
+Signed-off-by: Dave Kleikamp <dave.kleikamp@oracle.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/jfs/jfs_dmap.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
+index 3ad0a33e0443b..6fdf18caf241d 100644
+--- a/fs/jfs/jfs_dmap.c
++++ b/fs/jfs/jfs_dmap.c
+@@ -2034,6 +2034,9 @@ dbAllocDmapLev(struct bmap * bmp,
+ if (dbFindLeaf((dmtree_t *) & dp->tree, l2nb, &leafidx))
+ return -ENOSPC;
+
++ if (leafidx < 0)
++ return -EIO;
++
+ /* determine the block number within the file system corresponding
+ * to the leaf at which free space was found.
+ */
+--
+2.39.2
+
--- /dev/null
+From dc3ca84683c4bb50761998adaf575f383748ba73 Mon Sep 17 00:00:00 2001
+From: Marco Morandini <marco.morandini@polimi.it>
+Date: Tue, 30 May 2023 15:40:08 +0200
+Subject: [PATCH AUTOSEL 4.19 05/11] HID: add quirk for 03f0:464a HP Elite
+ Presenter Mouse
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 0db117359e47750d8bd310d19f13e1c4ef7fc26a ]
+
+HP Elite Presenter Mouse HID Record Descriptor shows
+two mouses (Repord ID 0x1 and 0x2), one keypad (Report ID 0x5),
+two Consumer Controls (Report IDs 0x6 and 0x3).
+Previous to this commit it registers one mouse, one keypad
+and one Consumer Control, and it was usable only as a
+digitl laser pointer (one of the two mouses). This patch defines
+the 464a USB device ID and enables the HID_QUIRK_MULTI_INPUT
+quirk for it, allowing to use the device both as a mouse
+and a digital laser pointer.
+
+Signed-off-by: Marco Morandini <marco.morandini@polimi.it>
+Signed-off-by: Jiri Kosina <jkosina@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/hid/hid-ids.h | 1 +
+ drivers/hid/hid-quirks.c | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
+index c0ba8d6f4978f..a9d6f8acf70b5 100644
+--- a/drivers/hid/hid-ids.h
++++ b/drivers/hid/hid-ids.h
+@@ -571,6 +571,7 @@
+ #define USB_DEVICE_ID_UGCI_FIGHTING 0x0030
+
+ #define USB_VENDOR_ID_HP 0x03f0
++#define USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A 0x464a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A 0x0a4a
+ #define USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A 0x0b4a
+ #define USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE 0x134a
+diff --git a/drivers/hid/hid-quirks.c b/drivers/hid/hid-quirks.c
+index 8de294aa3184a..a2ab338166e61 100644
+--- a/drivers/hid/hid-quirks.c
++++ b/drivers/hid/hid-quirks.c
+@@ -98,6 +98,7 @@ static const struct hid_device_id hid_quirks[] = {
+ { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A096), HID_QUIRK_NO_INIT_REPORTS },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HOLTEK_ALT, USB_DEVICE_ID_HOLTEK_ALT_KEYBOARD_A293), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0A4A), HID_QUIRK_ALWAYS_POLL },
++ { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_ELITE_PRESENTER_MOUSE_464A), HID_QUIRK_MULTI_INPUT },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_LOGITECH_OEM_USB_OPTICAL_MOUSE_0B4A), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE), HID_QUIRK_ALWAYS_POLL },
+ { HID_USB_DEVICE(USB_VENDOR_ID_HP, USB_PRODUCT_ID_HP_PIXART_OEM_USB_OPTICAL_MOUSE_094A), HID_QUIRK_ALWAYS_POLL },
+--
+2.39.2
+
--- /dev/null
+From d67f7140ec52c786fa3e1e17d5a41330d5965e52 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:25 -0400
+Subject: iavf: fix a deadlock caused by rtnl and driver's lock circular
+ dependencies
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit d1639a17319ba78a018280cd2df6577a7e5d9fab ]
+
+A driver's lock (crit_lock) is used to serialize all the driver's tasks.
+Lockdep, however, shows a circular dependency between rtnl and
+crit_lock. This happens when an ndo that already holds the rtnl requests
+the driver to reset, since the reset task (in some paths) tries to grab
+rtnl to either change real number of queues of update netdev features.
+
+ [566.241851] ======================================================
+ [566.241893] WARNING: possible circular locking dependency detected
+ [566.241936] 6.2.14-100.fc36.x86_64+debug #1 Tainted: G OE
+ [566.241984] ------------------------------------------------------
+ [566.242025] repro.sh/2604 is trying to acquire lock:
+ [566.242061] ffff9280fc5ceee8 (&adapter->crit_lock){+.+.}-{3:3}, at: iavf_close+0x3c/0x240 [iavf]
+ [566.242167]
+ but task is already holding lock:
+ [566.242209] ffffffff9976d350 (rtnl_mutex){+.+.}-{3:3}, at: iavf_remove+0x6b5/0x730 [iavf]
+ [566.242300]
+ which lock already depends on the new lock.
+
+ [566.242353]
+ the existing dependency chain (in reverse order) is:
+ [566.242401]
+ -> #1 (rtnl_mutex){+.+.}-{3:3}:
+ [566.242451] __mutex_lock+0xc1/0xbb0
+ [566.242489] iavf_init_interrupt_scheme+0x179/0x440 [iavf]
+ [566.242560] iavf_watchdog_task+0x80b/0x1400 [iavf]
+ [566.242627] process_one_work+0x2b3/0x560
+ [566.242663] worker_thread+0x4f/0x3a0
+ [566.242696] kthread+0xf2/0x120
+ [566.242730] ret_from_fork+0x29/0x50
+ [566.242763]
+ -> #0 (&adapter->crit_lock){+.+.}-{3:3}:
+ [566.242815] __lock_acquire+0x15ff/0x22b0
+ [566.242869] lock_acquire+0xd2/0x2c0
+ [566.242901] __mutex_lock+0xc1/0xbb0
+ [566.242934] iavf_close+0x3c/0x240 [iavf]
+ [566.242997] __dev_close_many+0xac/0x120
+ [566.243036] dev_close_many+0x8b/0x140
+ [566.243071] unregister_netdevice_many_notify+0x165/0x7c0
+ [566.243116] unregister_netdevice_queue+0xd3/0x110
+ [566.243157] iavf_remove+0x6c1/0x730 [iavf]
+ [566.243217] pci_device_remove+0x33/0xa0
+ [566.243257] device_release_driver_internal+0x1bc/0x240
+ [566.243299] pci_stop_bus_device+0x6c/0x90
+ [566.243338] pci_stop_and_remove_bus_device+0xe/0x20
+ [566.243380] pci_iov_remove_virtfn+0xd1/0x130
+ [566.243417] sriov_disable+0x34/0xe0
+ [566.243448] ice_free_vfs+0x2da/0x330 [ice]
+ [566.244383] ice_sriov_configure+0x88/0xad0 [ice]
+ [566.245353] sriov_numvfs_store+0xde/0x1d0
+ [566.246156] kernfs_fop_write_iter+0x15e/0x210
+ [566.246921] vfs_write+0x288/0x530
+ [566.247671] ksys_write+0x74/0xf0
+ [566.248408] do_syscall_64+0x58/0x80
+ [566.249145] entry_SYSCALL_64_after_hwframe+0x72/0xdc
+ [566.249886]
+ other info that might help us debug this:
+
+ [566.252014] Possible unsafe locking scenario:
+
+ [566.253432] CPU0 CPU1
+ [566.254118] ---- ----
+ [566.254800] lock(rtnl_mutex);
+ [566.255514] lock(&adapter->crit_lock);
+ [566.256233] lock(rtnl_mutex);
+ [566.256897] lock(&adapter->crit_lock);
+ [566.257388]
+ *** DEADLOCK ***
+
+The deadlock can be triggered by a script that is continuously resetting
+the VF adapter while doing other operations requiring RTNL, e.g:
+
+ while :; do
+ ip link set $VF up
+ ethtool --set-channels $VF combined 2
+ ip link set $VF down
+ ip link set $VF up
+ ethtool --set-channels $VF combined 4
+ ip link set $VF down
+ done
+
+Any operation that triggers a reset can substitute "ethtool --set-channles"
+
+As a fix, add a new task "finish_config" that do all the work which
+needs rtnl lock. With the exception of iavf_remove(), all work that
+require rtnl should be called from this task.
+
+As for iavf_remove(), at the point where we need to call
+unregister_netdevice() (and grab rtnl_lock), we make sure the finish_config
+task is not running (cancel_work_sync()) to safely grab rtnl. Subsequent
+finish_config work cannot restart after that since the task is guarded
+by the __IAVF_IN_REMOVE_TASK bit in iavf_schedule_finish_config().
+
+Fixes: 5ac49f3c2702 ("iavf: use mutexes for locking of critical sections")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h | 2 +
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 114 +++++++++++++-----
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c | 1 +
+ 3 files changed, 85 insertions(+), 32 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 2fe44e865d0a2..305675042fe55 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -255,6 +255,7 @@ struct iavf_adapter {
+ struct workqueue_struct *wq;
+ struct work_struct reset_task;
+ struct work_struct adminq_task;
++ struct work_struct finish_config;
+ struct delayed_work client_task;
+ wait_queue_head_t down_waitqueue;
+ wait_queue_head_t reset_waitqueue;
+@@ -521,6 +522,7 @@ int iavf_process_config(struct iavf_adapter *adapter);
+ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
+ void iavf_schedule_reset(struct iavf_adapter *adapter);
+ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
++void iavf_schedule_finish_config(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+ void iavf_set_ethtool_ops(struct net_device *netdev);
+ void iavf_update_stats(struct iavf_adapter *adapter);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index c2739071149de..0e201d690f0dd 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1702,10 +1702,10 @@ static int iavf_set_interrupt_capability(struct iavf_adapter *adapter)
+ adapter->msix_entries[vector].entry = vector;
+
+ err = iavf_acquire_msix_vectors(adapter, v_budget);
++ if (!err)
++ iavf_schedule_finish_config(adapter);
+
+ out:
+- netif_set_real_num_rx_queues(adapter->netdev, pairs);
+- netif_set_real_num_tx_queues(adapter->netdev, pairs);
+ return err;
+ }
+
+@@ -1925,9 +1925,7 @@ static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+ goto err_alloc_queues;
+ }
+
+- rtnl_lock();
+ err = iavf_set_interrupt_capability(adapter);
+- rtnl_unlock();
+ if (err) {
+ dev_err(&adapter->pdev->dev,
+ "Unable to setup interrupt capabilities\n");
+@@ -2013,6 +2011,78 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni
+ return err;
+ }
+
++/**
++ * iavf_finish_config - do all netdev work that needs RTNL
++ * @work: our work_struct
++ *
++ * Do work that needs both RTNL and crit_lock.
++ **/
++static void iavf_finish_config(struct work_struct *work)
++{
++ struct iavf_adapter *adapter;
++ int pairs, err;
++
++ adapter = container_of(work, struct iavf_adapter, finish_config);
++
++ /* Always take RTNL first to prevent circular lock dependency */
++ rtnl_lock();
++ mutex_lock(&adapter->crit_lock);
++
++ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
++ adapter->netdev_registered &&
++ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
++ netdev_update_features(adapter->netdev);
++ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
++ }
++
++ switch (adapter->state) {
++ case __IAVF_DOWN:
++ if (!adapter->netdev_registered) {
++ err = register_netdevice(adapter->netdev);
++ if (err) {
++ dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
++ err);
++
++ /* go back and try again.*/
++ iavf_free_rss(adapter);
++ iavf_free_misc_irq(adapter);
++ iavf_reset_interrupt_capability(adapter);
++ iavf_change_state(adapter,
++ __IAVF_INIT_CONFIG_ADAPTER);
++ goto out;
++ }
++ adapter->netdev_registered = true;
++ }
++
++ /* Set the real number of queues when reset occurs while
++ * state == __IAVF_DOWN
++ */
++ fallthrough;
++ case __IAVF_RUNNING:
++ pairs = adapter->num_active_queues;
++ netif_set_real_num_rx_queues(adapter->netdev, pairs);
++ netif_set_real_num_tx_queues(adapter->netdev, pairs);
++ break;
++
++ default:
++ break;
++ }
++
++out:
++ mutex_unlock(&adapter->crit_lock);
++ rtnl_unlock();
++}
++
++/**
++ * iavf_schedule_finish_config - Set the flags and schedule a reset event
++ * @adapter: board private structure
++ **/
++void iavf_schedule_finish_config(struct iavf_adapter *adapter)
++{
++ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
++ queue_work(adapter->wq, &adapter->finish_config);
++}
++
+ /**
+ * iavf_process_aq_command - process aq_required flags
+ * and sends aq command
+@@ -2650,22 +2720,8 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+
+ netif_carrier_off(netdev);
+ adapter->link_up = false;
+-
+- /* set the semaphore to prevent any callbacks after device registration
+- * up to time when state of driver will be set to __IAVF_DOWN
+- */
+- rtnl_lock();
+- if (!adapter->netdev_registered) {
+- err = register_netdevice(netdev);
+- if (err) {
+- rtnl_unlock();
+- goto err_register;
+- }
+- }
+-
+- adapter->netdev_registered = true;
+-
+ netif_tx_stop_all_queues(netdev);
++
+ if (CLIENT_ALLOWED(adapter)) {
+ err = iavf_lan_add_device(adapter);
+ if (err)
+@@ -2678,7 +2734,6 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+
+ iavf_change_state(adapter, __IAVF_DOWN);
+ set_bit(__IAVF_VSI_DOWN, adapter->vsi.state);
+- rtnl_unlock();
+
+ iavf_misc_irq_enable(adapter);
+ wake_up(&adapter->down_waitqueue);
+@@ -2698,10 +2753,11 @@ static void iavf_init_config_adapter(struct iavf_adapter *adapter)
+ /* request initial VLAN offload settings */
+ iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+
++ iavf_schedule_finish_config(adapter);
+ return;
++
+ err_mem:
+ iavf_free_rss(adapter);
+-err_register:
+ iavf_free_misc_irq(adapter);
+ err_sw_init:
+ iavf_reset_interrupt_capability(adapter);
+@@ -2728,15 +2784,6 @@ static void iavf_watchdog_task(struct work_struct *work)
+ goto restart_watchdog;
+ }
+
+- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+- adapter->netdev_registered &&
+- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
+- rtnl_trylock()) {
+- netdev_update_features(adapter->netdev);
+- rtnl_unlock();
+- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+- }
+-
+ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+ iavf_change_state(adapter, __IAVF_COMM_FAILED);
+
+@@ -4980,6 +5027,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+
+ INIT_WORK(&adapter->reset_task, iavf_reset_task);
+ INIT_WORK(&adapter->adminq_task, iavf_adminq_task);
++ INIT_WORK(&adapter->finish_config, iavf_finish_config);
+ INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task);
+ INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+@@ -5123,13 +5171,15 @@ static void iavf_remove(struct pci_dev *pdev)
+ usleep_range(500, 1000);
+ }
+ cancel_delayed_work_sync(&adapter->watchdog_task);
++ cancel_work_sync(&adapter->finish_config);
+
++ rtnl_lock();
+ if (adapter->netdev_registered) {
+- rtnl_lock();
+ unregister_netdevice(netdev);
+ adapter->netdev_registered = false;
+- rtnl_unlock();
+ }
++ rtnl_unlock();
++
+ if (CLIENT_ALLOWED(adapter)) {
+ err = iavf_lan_del_device(adapter);
+ if (err)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index eec7ac3b7f6ee..35419673b6987 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2237,6 +2237,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+
+ iavf_process_config(adapter);
+ adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
++ iavf_schedule_finish_config(adapter);
+
+ iavf_set_queue_vlan_tag_loc(adapter);
+
+--
+2.39.2
+
--- /dev/null
+From cc55115bcb0aa7ee5bb38c780a6de7795ff2f2b5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 19:11:48 +0800
+Subject: iavf: Fix out-of-bounds when setting channels on remove
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit 7c4bced3caa749ce468b0c5de711c98476b23a52 ]
+
+If we set channels greater during iavf_remove(), and waiting reset done
+would be timeout, then returned with error but changed num_active_queues
+directly, that will lead to OOB like the following logs. Because the
+num_active_queues is greater than tx/rx_rings[] allocated actually.
+
+Reproducer:
+
+ [root@host ~]# cat repro.sh
+ #!/bin/bash
+
+ pf_dbsf="0000:41:00.0"
+ vf0_dbsf="0000:41:02.0"
+ g_pids=()
+
+ function do_set_numvf()
+ {
+ echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+ sleep $((RANDOM%3+1))
+ echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+ sleep $((RANDOM%3+1))
+ }
+
+ function do_set_channel()
+ {
+ local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
+ [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
+ ifconfig $nic 192.168.18.5 netmask 255.255.255.0
+ ifconfig $nic up
+ ethtool -L $nic combined 1
+ ethtool -L $nic combined 4
+ sleep $((RANDOM%3))
+ }
+
+ function on_exit()
+ {
+ local pid
+ for pid in "${g_pids[@]}"; do
+ kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
+ done
+ g_pids=()
+ }
+
+ trap "on_exit; exit" EXIT
+
+ while :; do do_set_numvf ; done &
+ g_pids+=($!)
+ while :; do do_set_channel ; done &
+ g_pids+=($!)
+
+ wait
+
+Result:
+
+[ 3506.152887] iavf 0000:41:02.0: Removing device
+[ 3510.400799] ==================================================================
+[ 3510.400820] BUG: KASAN: slab-out-of-bounds in iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400823] Read of size 8 at addr ffff88b6f9311008 by task repro.sh/55536
+[ 3510.400823]
+[ 3510.400830] CPU: 101 PID: 55536 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1
+[ 3510.400832] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021
+[ 3510.400835] Call Trace:
+[ 3510.400851] dump_stack+0x71/0xab
+[ 3510.400860] print_address_description+0x6b/0x290
+[ 3510.400865] ? iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400868] kasan_report+0x14a/0x2b0
+[ 3510.400873] iavf_free_all_tx_resources+0x156/0x160 [iavf]
+[ 3510.400880] iavf_remove+0x2b6/0xc70 [iavf]
+[ 3510.400884] ? iavf_free_all_rx_resources+0x160/0x160 [iavf]
+[ 3510.400891] ? wait_woken+0x1d0/0x1d0
+[ 3510.400895] ? notifier_call_chain+0xc1/0x130
+[ 3510.400903] pci_device_remove+0xa8/0x1f0
+[ 3510.400910] device_release_driver_internal+0x1c6/0x460
+[ 3510.400916] pci_stop_bus_device+0x101/0x150
+[ 3510.400919] pci_stop_and_remove_bus_device+0xe/0x20
+[ 3510.400924] pci_iov_remove_virtfn+0x187/0x420
+[ 3510.400927] ? pci_iov_add_virtfn+0xe10/0xe10
+[ 3510.400929] ? pci_get_subsys+0x90/0x90
+[ 3510.400932] sriov_disable+0xed/0x3e0
+[ 3510.400936] ? bus_find_device+0x12d/0x1a0
+[ 3510.400953] i40e_free_vfs+0x754/0x1210 [i40e]
+[ 3510.400966] ? i40e_reset_all_vfs+0x880/0x880 [i40e]
+[ 3510.400968] ? pci_get_device+0x7c/0x90
+[ 3510.400970] ? pci_get_subsys+0x90/0x90
+[ 3510.400982] ? pci_vfs_assigned.part.7+0x144/0x210
+[ 3510.400987] ? __mutex_lock_slowpath+0x10/0x10
+[ 3510.400996] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 3510.401001] sriov_numvfs_store+0x214/0x290
+[ 3510.401005] ? sriov_totalvfs_show+0x30/0x30
+[ 3510.401007] ? __mutex_lock_slowpath+0x10/0x10
+[ 3510.401011] ? __check_object_size+0x15a/0x350
+[ 3510.401018] kernfs_fop_write+0x280/0x3f0
+[ 3510.401022] vfs_write+0x145/0x440
+[ 3510.401025] ksys_write+0xab/0x160
+[ 3510.401028] ? __ia32_sys_read+0xb0/0xb0
+[ 3510.401031] ? fput_many+0x1a/0x120
+[ 3510.401032] ? filp_close+0xf0/0x130
+[ 3510.401038] do_syscall_64+0xa0/0x370
+[ 3510.401041] ? page_fault+0x8/0x30
+[ 3510.401043] entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 3510.401073] RIP: 0033:0x7f3a9bb842c0
+[ 3510.401079] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24
+[ 3510.401080] RSP: 002b:00007ffc05f1fe18 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 3510.401083] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f3a9bb842c0
+[ 3510.401085] RDX: 0000000000000002 RSI: 0000000002327408 RDI: 0000000000000001
+[ 3510.401086] RBP: 0000000002327408 R08: 00007f3a9be53780 R09: 00007f3a9c8a4700
+[ 3510.401086] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002
+[ 3510.401087] R13: 0000000000000001 R14: 00007f3a9be52620 R15: 0000000000000001
+[ 3510.401090]
+[ 3510.401093] Allocated by task 76795:
+[ 3510.401098] kasan_kmalloc+0xa6/0xd0
+[ 3510.401099] __kmalloc+0xfb/0x200
+[ 3510.401104] iavf_init_interrupt_scheme+0x26f/0x1310 [iavf]
+[ 3510.401108] iavf_watchdog_task+0x1d58/0x4050 [iavf]
+[ 3510.401114] process_one_work+0x56a/0x11f0
+[ 3510.401115] worker_thread+0x8f/0xf40
+[ 3510.401117] kthread+0x2a0/0x390
+[ 3510.401119] ret_from_fork+0x1f/0x40
+[ 3510.401122] 0xffffffffffffffff
+[ 3510.401123]
+
+In timeout handling, we should keep the original num_active_queues
+and reset num_req_queues to 0.
+
+Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Donglin Peng <pengdonglin@sangfor.com.cn>
+Cc: Huang Cun <huangcun@sangfor.com.cn>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_ethtool.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 83cfc54a47062..4746ee517c75a 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -1863,7 +1863,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ }
+ if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+- adapter->num_active_queues = num_req;
++ adapter->num_req_queues = 0;
+ return -EOPNOTSUPP;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 045d5f68bcd8b2284e19c86bfd77bc8ae236d467 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:26 -0400
+Subject: iavf: fix reset task race with iavf_remove()
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit c34743daca0eb1dc855831a5210f0800a850088e ]
+
+The reset task is currently scheduled from the watchdog or adminq tasks.
+First, all direct calls to schedule the reset task are replaced with the
+iavf_schedule_reset(), which is modified to accept the flag showing the
+type of reset.
+
+To prevent the reset task from starting once iavf_remove() starts, we need
+to check the __IAVF_IN_REMOVE_TASK bit before we schedule it. This is now
+easily added to iavf_schedule_reset().
+
+Finally, remove the check for IAVF_FLAG_RESET_NEEDED in the watchdog task.
+It is redundant since all callers who set the flag immediately schedules
+the reset task.
+
+Fixes: 3ccd54ef44eb ("iavf: Fix init state closure on remove")
+Fixes: 14756b2ae265 ("iavf: Fix __IAVF_RESETTING state usage")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h | 2 +-
+ .../net/ethernet/intel/iavf/iavf_ethtool.c | 8 ++---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 32 +++++++------------
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c | 3 +-
+ 4 files changed, 16 insertions(+), 29 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 305675042fe55..543931c06bb17 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -520,7 +520,7 @@ int iavf_up(struct iavf_adapter *adapter);
+ void iavf_down(struct iavf_adapter *adapter);
+ int iavf_process_config(struct iavf_adapter *adapter);
+ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter);
+-void iavf_schedule_reset(struct iavf_adapter *adapter);
++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags);
+ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+ void iavf_schedule_finish_config(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 73219c5069290..fd6d6f6263f66 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -532,8 +532,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+ /* issue a reset to force legacy-rx change to take effect */
+ if (changed_flags & IAVF_FLAG_LEGACY_RX) {
+ if (netif_running(netdev)) {
+- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+- queue_work(adapter->wq, &adapter->reset_task);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+ netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
+@@ -676,8 +675,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
+ }
+
+ if (netif_running(netdev)) {
+- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+- queue_work(adapter->wq, &adapter->reset_task);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+ netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
+@@ -1860,7 +1858,7 @@ static int iavf_set_channels(struct net_device *netdev,
+
+ adapter->num_req_queues = num_req;
+ adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+- iavf_schedule_reset(adapter);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+
+ ret = iavf_wait_for_reset(adapter);
+ if (ret)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 0e201d690f0dd..c1f91c55e1ca7 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -309,12 +309,14 @@ static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+ /**
+ * iavf_schedule_reset - Set the flags and schedule a reset event
+ * @adapter: board private structure
++ * @flags: IAVF_FLAG_RESET_PENDING or IAVF_FLAG_RESET_NEEDED
+ **/
+-void iavf_schedule_reset(struct iavf_adapter *adapter)
++void iavf_schedule_reset(struct iavf_adapter *adapter, u64 flags)
+ {
+- if (!(adapter->flags &
+- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
+- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
++ if (!test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
++ !(adapter->flags &
++ (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) {
++ adapter->flags |= flags;
+ queue_work(adapter->wq, &adapter->reset_task);
+ }
+ }
+@@ -342,7 +344,7 @@ static void iavf_tx_timeout(struct net_device *netdev, unsigned int txqueue)
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+
+ adapter->tx_timeout_count++;
+- iavf_schedule_reset(adapter);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ }
+
+ /**
+@@ -2490,7 +2492,7 @@ int iavf_parse_vf_resource_msg(struct iavf_adapter *adapter)
+ adapter->vsi_res->num_queue_pairs);
+ adapter->flags |= IAVF_FLAG_REINIT_MSIX_NEEDED;
+ adapter->num_req_queues = adapter->vsi_res->num_queue_pairs;
+- iavf_schedule_reset(adapter);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+
+ return -EAGAIN;
+ }
+@@ -2787,14 +2789,6 @@ static void iavf_watchdog_task(struct work_struct *work)
+ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+ iavf_change_state(adapter, __IAVF_COMM_FAILED);
+
+- if (adapter->flags & IAVF_FLAG_RESET_NEEDED) {
+- adapter->aq_required = 0;
+- adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+- mutex_unlock(&adapter->crit_lock);
+- queue_work(adapter->wq, &adapter->reset_task);
+- return;
+- }
+-
+ switch (adapter->state) {
+ case __IAVF_STARTUP:
+ iavf_startup(adapter);
+@@ -2922,11 +2916,10 @@ static void iavf_watchdog_task(struct work_struct *work)
+ /* check for hw reset */
+ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK;
+ if (!reg_val) {
+- adapter->flags |= IAVF_FLAG_RESET_PENDING;
+ adapter->aq_required = 0;
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+- queue_work(adapter->wq, &adapter->reset_task);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+ mutex_unlock(&adapter->crit_lock);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task, HZ * 2);
+@@ -3324,9 +3317,7 @@ static void iavf_adminq_task(struct work_struct *work)
+ } while (pending);
+ mutex_unlock(&adapter->crit_lock);
+
+- if ((adapter->flags &
+- (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+- adapter->state == __IAVF_RESETTING)
++ if (iavf_is_reset_in_progress(adapter))
+ goto freedom;
+
+ /* check for error indications */
+@@ -4423,8 +4414,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+ }
+
+ if (netif_running(netdev)) {
+- adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+- queue_work(adapter->wq, &adapter->reset_task);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_NEEDED);
+ ret = iavf_wait_for_reset(adapter);
+ if (ret < 0)
+ netdev_warn(netdev, "MTU change interrupted waiting for reset");
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 35419673b6987..2fc8e60ef6afb 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -1961,9 +1961,8 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ case VIRTCHNL_EVENT_RESET_IMPENDING:
+ dev_info(&adapter->pdev->dev, "Reset indication received from the PF\n");
+ if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) {
+- adapter->flags |= IAVF_FLAG_RESET_PENDING;
+ dev_info(&adapter->pdev->dev, "Scheduling reset task\n");
+- queue_work(adapter->wq, &adapter->reset_task);
++ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+ }
+ break;
+ default:
+--
+2.39.2
+
--- /dev/null
+From 65df986e4dd0e7534d9caca118a4603cfb45336b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 9 May 2023 19:11:47 +0800
+Subject: iavf: Fix use-after-free in free_netdev
+
+From: Ding Hui <dinghui@sangfor.com.cn>
+
+[ Upstream commit 5f4fa1672d98fe99d2297b03add35346f1685d6b ]
+
+We do netif_napi_add() for all allocated q_vectors[], but potentially
+do netif_napi_del() for part of them, then kfree q_vectors and leave
+invalid pointers at dev->napi_list.
+
+Reproducer:
+
+ [root@host ~]# cat repro.sh
+ #!/bin/bash
+
+ pf_dbsf="0000:41:00.0"
+ vf0_dbsf="0000:41:02.0"
+ g_pids=()
+
+ function do_set_numvf()
+ {
+ echo 2 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+ sleep $((RANDOM%3+1))
+ echo 0 >/sys/bus/pci/devices/${pf_dbsf}/sriov_numvfs
+ sleep $((RANDOM%3+1))
+ }
+
+ function do_set_channel()
+ {
+ local nic=$(ls -1 --indicator-style=none /sys/bus/pci/devices/${vf0_dbsf}/net/)
+ [ -z "$nic" ] && { sleep $((RANDOM%3)) ; return 1; }
+ ifconfig $nic 192.168.18.5 netmask 255.255.255.0
+ ifconfig $nic up
+ ethtool -L $nic combined 1
+ ethtool -L $nic combined 4
+ sleep $((RANDOM%3))
+ }
+
+ function on_exit()
+ {
+ local pid
+ for pid in "${g_pids[@]}"; do
+ kill -0 "$pid" &>/dev/null && kill "$pid" &>/dev/null
+ done
+ g_pids=()
+ }
+
+ trap "on_exit; exit" EXIT
+
+ while :; do do_set_numvf ; done &
+ g_pids+=($!)
+ while :; do do_set_channel ; done &
+ g_pids+=($!)
+
+ wait
+
+Result:
+
+[ 4093.900222] ==================================================================
+[ 4093.900230] BUG: KASAN: use-after-free in free_netdev+0x308/0x390
+[ 4093.900232] Read of size 8 at addr ffff88b4dc145640 by task repro.sh/6699
+[ 4093.900233]
+[ 4093.900236] CPU: 10 PID: 6699 Comm: repro.sh Kdump: loaded Tainted: G O --------- -t - 4.18.0 #1
+[ 4093.900238] Hardware name: Powerleader PR2008AL/H12DSi-N6, BIOS 2.0 04/09/2021
+[ 4093.900239] Call Trace:
+[ 4093.900244] dump_stack+0x71/0xab
+[ 4093.900249] print_address_description+0x6b/0x290
+[ 4093.900251] ? free_netdev+0x308/0x390
+[ 4093.900252] kasan_report+0x14a/0x2b0
+[ 4093.900254] free_netdev+0x308/0x390
+[ 4093.900261] iavf_remove+0x825/0xd20 [iavf]
+[ 4093.900265] pci_device_remove+0xa8/0x1f0
+[ 4093.900268] device_release_driver_internal+0x1c6/0x460
+[ 4093.900271] pci_stop_bus_device+0x101/0x150
+[ 4093.900273] pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900275] pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900277] ? pci_iov_add_virtfn+0xe10/0xe10
+[ 4093.900278] ? pci_get_subsys+0x90/0x90
+[ 4093.900280] sriov_disable+0xed/0x3e0
+[ 4093.900282] ? bus_find_device+0x12d/0x1a0
+[ 4093.900290] i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900298] ? i40e_reset_all_vfs+0x880/0x880 [i40e]
+[ 4093.900299] ? pci_get_device+0x7c/0x90
+[ 4093.900300] ? pci_get_subsys+0x90/0x90
+[ 4093.900306] ? pci_vfs_assigned.part.7+0x144/0x210
+[ 4093.900309] ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900315] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900318] sriov_numvfs_store+0x214/0x290
+[ 4093.900320] ? sriov_totalvfs_show+0x30/0x30
+[ 4093.900321] ? __mutex_lock_slowpath+0x10/0x10
+[ 4093.900323] ? __check_object_size+0x15a/0x350
+[ 4093.900326] kernfs_fop_write+0x280/0x3f0
+[ 4093.900329] vfs_write+0x145/0x440
+[ 4093.900330] ksys_write+0xab/0x160
+[ 4093.900332] ? __ia32_sys_read+0xb0/0xb0
+[ 4093.900334] ? fput_many+0x1a/0x120
+[ 4093.900335] ? filp_close+0xf0/0x130
+[ 4093.900338] do_syscall_64+0xa0/0x370
+[ 4093.900339] ? page_fault+0x8/0x30
+[ 4093.900341] entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900357] RIP: 0033:0x7f16ad4d22c0
+[ 4093.900359] Code: 73 01 c3 48 8b 0d d8 cb 2c 00 f7 d8 64 89 01 48 83 c8 ff c3 66 0f 1f 44 00 00 83 3d 89 24 2d 00 00 75 10 b8 01 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 31 c3 48 83 ec 08 e8 fe dd 01 00 48 89 04 24
+[ 4093.900360] RSP: 002b:00007ffd6491b7f8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
+[ 4093.900362] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f16ad4d22c0
+[ 4093.900363] RDX: 0000000000000002 RSI: 0000000001a41408 RDI: 0000000000000001
+[ 4093.900364] RBP: 0000000001a41408 R08: 00007f16ad7a1780 R09: 00007f16ae1f2700
+[ 4093.900364] R10: 0000000000000001 R11: 0000000000000246 R12: 0000000000000002
+[ 4093.900365] R13: 0000000000000001 R14: 00007f16ad7a0620 R15: 0000000000000001
+[ 4093.900367]
+[ 4093.900368] Allocated by task 820:
+[ 4093.900371] kasan_kmalloc+0xa6/0xd0
+[ 4093.900373] __kmalloc+0xfb/0x200
+[ 4093.900376] iavf_init_interrupt_scheme+0x63b/0x1320 [iavf]
+[ 4093.900380] iavf_watchdog_task+0x3d51/0x52c0 [iavf]
+[ 4093.900382] process_one_work+0x56a/0x11f0
+[ 4093.900383] worker_thread+0x8f/0xf40
+[ 4093.900384] kthread+0x2a0/0x390
+[ 4093.900385] ret_from_fork+0x1f/0x40
+[ 4093.900387] 0xffffffffffffffff
+[ 4093.900387]
+[ 4093.900388] Freed by task 6699:
+[ 4093.900390] __kasan_slab_free+0x137/0x190
+[ 4093.900391] kfree+0x8b/0x1b0
+[ 4093.900394] iavf_free_q_vectors+0x11d/0x1a0 [iavf]
+[ 4093.900397] iavf_remove+0x35a/0xd20 [iavf]
+[ 4093.900399] pci_device_remove+0xa8/0x1f0
+[ 4093.900400] device_release_driver_internal+0x1c6/0x460
+[ 4093.900401] pci_stop_bus_device+0x101/0x150
+[ 4093.900402] pci_stop_and_remove_bus_device+0xe/0x20
+[ 4093.900403] pci_iov_remove_virtfn+0x187/0x420
+[ 4093.900404] sriov_disable+0xed/0x3e0
+[ 4093.900409] i40e_free_vfs+0x754/0x1210 [i40e]
+[ 4093.900415] i40e_pci_sriov_configure+0x1fa/0x2e0 [i40e]
+[ 4093.900416] sriov_numvfs_store+0x214/0x290
+[ 4093.900417] kernfs_fop_write+0x280/0x3f0
+[ 4093.900418] vfs_write+0x145/0x440
+[ 4093.900419] ksys_write+0xab/0x160
+[ 4093.900420] do_syscall_64+0xa0/0x370
+[ 4093.900421] entry_SYSCALL_64_after_hwframe+0x65/0xca
+[ 4093.900422] 0xffffffffffffffff
+[ 4093.900422]
+[ 4093.900424] The buggy address belongs to the object at ffff88b4dc144200
+ which belongs to the cache kmalloc-8k of size 8192
+[ 4093.900425] The buggy address is located 5184 bytes inside of
+ 8192-byte region [ffff88b4dc144200, ffff88b4dc146200)
+[ 4093.900425] The buggy address belongs to the page:
+[ 4093.900427] page:ffffea00d3705000 refcount:1 mapcount:0 mapping:ffff88bf04415c80 index:0x0 compound_mapcount: 0
+[ 4093.900430] flags: 0x10000000008100(slab|head)
+[ 4093.900433] raw: 0010000000008100 dead000000000100 dead000000000200 ffff88bf04415c80
+[ 4093.900434] raw: 0000000000000000 0000000000030003 00000001ffffffff 0000000000000000
+[ 4093.900434] page dumped because: kasan: bad access detected
+[ 4093.900435]
+[ 4093.900435] Memory state around the buggy address:
+[ 4093.900436] ffff88b4dc145500: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900437] ffff88b4dc145580: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438] >ffff88b4dc145600: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900438] ^
+[ 4093.900439] ffff88b4dc145680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440] ffff88b4dc145700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+[ 4093.900440] ==================================================================
+
+Although the patch #2 (of 2) can avoid the issue triggered by this
+repro.sh, there still are other potential risks that if num_active_queues
+is changed to less than allocated q_vectors[] by unexpected, the
+mismatched netif_napi_add/del() can also cause UAF.
+
+Since we actually call netif_napi_add() for all allocated q_vectors
+unconditionally in iavf_alloc_q_vectors(), so we should fix it by
+letting netif_napi_del() match to netif_napi_add().
+
+Fixes: 5eae00c57f5e ("i40evf: main driver core")
+Signed-off-by: Ding Hui <dinghui@sangfor.com.cn>
+Cc: Donglin Peng <pengdonglin@sangfor.com.cn>
+Cc: Huang Cun <huangcun@sangfor.com.cn>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Reviewed-by: Madhu Chittim <madhu.chittim@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 965d02d7ff80f..81676c3af4b36 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1840,19 +1840,16 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ {
+ int q_idx, num_q_vectors;
+- int napi_vectors;
+
+ if (!adapter->q_vectors)
+ return;
+
+ num_q_vectors = adapter->num_msix_vectors - NONQ_VECS;
+- napi_vectors = adapter->num_active_queues;
+
+ for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+ struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+
+- if (q_idx < napi_vectors)
+- netif_napi_del(&q_vector->napi);
++ netif_napi_del(&q_vector->napi);
+ }
+ kfree(adapter->q_vectors);
+ adapter->q_vectors = NULL;
+--
+2.39.2
+
--- /dev/null
+From 97d8a9e529256a00151bc682e79efba868de17a6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 08:54:05 -0700
+Subject: iavf: make functions static where possible
+
+From: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+
+[ Upstream commit a4aadf0f5905661cd25c366b96cc1c840f05b756 ]
+
+Make all possible functions static.
+
+Move iavf_force_wb() up to avoid forward declaration.
+
+Suggested-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
+Signed-off-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h | 10 -----
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 14 +++----
+ drivers/net/ethernet/intel/iavf/iavf_txrx.c | 43 ++++++++++-----------
+ drivers/net/ethernet/intel/iavf/iavf_txrx.h | 4 --
+ 4 files changed, 28 insertions(+), 43 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index 6625625f91e47..a716ed6bb787d 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -523,9 +523,6 @@ void iavf_schedule_request_stats(struct iavf_adapter *adapter);
+ void iavf_reset(struct iavf_adapter *adapter);
+ void iavf_set_ethtool_ops(struct net_device *netdev);
+ void iavf_update_stats(struct iavf_adapter *adapter);
+-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter);
+-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter);
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter);
+ void iavf_free_all_tx_resources(struct iavf_adapter *adapter);
+ void iavf_free_all_rx_resources(struct iavf_adapter *adapter);
+
+@@ -579,17 +576,10 @@ void iavf_enable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_disable_vlan_stripping_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_enable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+ void iavf_disable_vlan_insertion_v2(struct iavf_adapter *adapter, u16 tpid);
+-int iavf_replace_primary_mac(struct iavf_adapter *adapter,
+- const u8 *new_mac);
+-void
+-iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
+- netdev_features_t prev_features,
+- netdev_features_t features);
+ void iavf_add_fdir_filter(struct iavf_adapter *adapter);
+ void iavf_del_fdir_filter(struct iavf_adapter *adapter);
+ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+ const u8 *macaddr);
+-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs);
+ #endif /* _IAVF_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 68e951fe5e210..d5b1dcfe0ccdd 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -253,7 +253,7 @@ enum iavf_status iavf_free_virt_mem_d(struct iavf_hw *hw,
+ *
+ * Returns 0 on success, negative on failure
+ **/
+-int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
++static int iavf_lock_timeout(struct mutex *lock, unsigned int msecs)
+ {
+ unsigned int wait, delay = 10;
+
+@@ -362,7 +362,7 @@ static void iavf_irq_disable(struct iavf_adapter *adapter)
+ * iavf_irq_enable_queues - Enable interrupt for all queues
+ * @adapter: board private structure
+ **/
+-void iavf_irq_enable_queues(struct iavf_adapter *adapter)
++static void iavf_irq_enable_queues(struct iavf_adapter *adapter)
+ {
+ struct iavf_hw *hw = &adapter->hw;
+ int i;
+@@ -1003,8 +1003,8 @@ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+ *
+ * Do not call this with mac_vlan_list_lock!
+ **/
+-int iavf_replace_primary_mac(struct iavf_adapter *adapter,
+- const u8 *new_mac)
++static int iavf_replace_primary_mac(struct iavf_adapter *adapter,
++ const u8 *new_mac)
+ {
+ struct iavf_hw *hw = &adapter->hw;
+ struct iavf_mac_filter *f;
+@@ -1860,7 +1860,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ * @adapter: board private structure
+ *
+ **/
+-void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
++static void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+ {
+ if (!adapter->msix_entries)
+ return;
+@@ -1875,7 +1875,7 @@ void iavf_reset_interrupt_capability(struct iavf_adapter *adapter)
+ * @adapter: board private structure to initialize
+ *
+ **/
+-int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
++static int iavf_init_interrupt_scheme(struct iavf_adapter *adapter)
+ {
+ int err;
+
+@@ -2174,7 +2174,7 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter)
+ * the watchdog if any changes are requested to expedite the request via
+ * virtchnl.
+ **/
+-void
++static void
+ iavf_set_vlan_offload_features(struct iavf_adapter *adapter,
+ netdev_features_t prev_features,
+ netdev_features_t features)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.c b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+index e989feda133c1..8c5f6096b0022 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.c
+@@ -54,7 +54,7 @@ static void iavf_unmap_and_free_tx_resource(struct iavf_ring *ring,
+ * iavf_clean_tx_ring - Free any empty Tx buffers
+ * @tx_ring: ring to be cleaned
+ **/
+-void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
++static void iavf_clean_tx_ring(struct iavf_ring *tx_ring)
+ {
+ unsigned long bi_size;
+ u16 i;
+@@ -110,7 +110,7 @@ void iavf_free_tx_resources(struct iavf_ring *tx_ring)
+ * Since there is no access to the ring head register
+ * in XL710, we need to use our local copies
+ **/
+-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
++static u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+ {
+ u32 head, tail;
+
+@@ -127,6 +127,24 @@ u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw)
+ return 0;
+ }
+
++/**
++ * iavf_force_wb - Issue SW Interrupt so HW does a wb
++ * @vsi: the VSI we care about
++ * @q_vector: the vector on which to force writeback
++ **/
++static void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
++{
++ u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
++ IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
++ IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
++ IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
++ /* allow 00 to be written to the index */;
++
++ wr32(&vsi->back->hw,
++ IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
++ val);
++}
++
+ /**
+ * iavf_detect_recover_hung - Function to detect and recover hung_queues
+ * @vsi: pointer to vsi struct with tx queues
+@@ -352,25 +370,6 @@ static void iavf_enable_wb_on_itr(struct iavf_vsi *vsi,
+ q_vector->arm_wb_state = true;
+ }
+
+-/**
+- * iavf_force_wb - Issue SW Interrupt so HW does a wb
+- * @vsi: the VSI we care about
+- * @q_vector: the vector on which to force writeback
+- *
+- **/
+-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector)
+-{
+- u32 val = IAVF_VFINT_DYN_CTLN1_INTENA_MASK |
+- IAVF_VFINT_DYN_CTLN1_ITR_INDX_MASK | /* set noitr */
+- IAVF_VFINT_DYN_CTLN1_SWINT_TRIG_MASK |
+- IAVF_VFINT_DYN_CTLN1_SW_ITR_INDX_ENA_MASK
+- /* allow 00 to be written to the index */;
+-
+- wr32(&vsi->back->hw,
+- IAVF_VFINT_DYN_CTLN1(q_vector->reg_idx),
+- val);
+-}
+-
+ static inline bool iavf_container_is_rx(struct iavf_q_vector *q_vector,
+ struct iavf_ring_container *rc)
+ {
+@@ -687,7 +686,7 @@ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring)
+ * iavf_clean_rx_ring - Free Rx buffers
+ * @rx_ring: ring to be cleaned
+ **/
+-void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
++static void iavf_clean_rx_ring(struct iavf_ring *rx_ring)
+ {
+ unsigned long bi_size;
+ u16 i;
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_txrx.h b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+index 2624bf6d009e3..7e6ee32d19b69 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_txrx.h
++++ b/drivers/net/ethernet/intel/iavf/iavf_txrx.h
+@@ -442,15 +442,11 @@ static inline unsigned int iavf_rx_pg_order(struct iavf_ring *ring)
+
+ bool iavf_alloc_rx_buffers(struct iavf_ring *rxr, u16 cleaned_count);
+ netdev_tx_t iavf_xmit_frame(struct sk_buff *skb, struct net_device *netdev);
+-void iavf_clean_tx_ring(struct iavf_ring *tx_ring);
+-void iavf_clean_rx_ring(struct iavf_ring *rx_ring);
+ int iavf_setup_tx_descriptors(struct iavf_ring *tx_ring);
+ int iavf_setup_rx_descriptors(struct iavf_ring *rx_ring);
+ void iavf_free_tx_resources(struct iavf_ring *tx_ring);
+ void iavf_free_rx_resources(struct iavf_ring *rx_ring);
+ int iavf_napi_poll(struct napi_struct *napi, int budget);
+-void iavf_force_wb(struct iavf_vsi *vsi, struct iavf_q_vector *q_vector);
+-u32 iavf_get_tx_pending(struct iavf_ring *ring, bool in_sw);
+ void iavf_detect_recover_hung(struct iavf_vsi *vsi);
+ int __iavf_maybe_stop_tx(struct iavf_ring *tx_ring, int size);
+ bool __iavf_chk_linearize(struct sk_buff *skb);
+--
+2.39.2
+
--- /dev/null
+From 5491562d5578b2fc118790482f43fbde751e023f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 3 Jan 2023 17:42:27 +0100
+Subject: iavf: Move netdev_update_features() into watchdog task
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit 7598f4b40bd60e4a4280de645eb2893eea80b59d ]
+
+Remove netdev_update_features() from iavf_adminq_task(), as it can cause
+deadlocks due to needing rtnl_lock. Instead use the
+IAVF_FLAG_SETUP_NETDEV_FEATURES flag to indicate that netdev features need
+to be updated in the watchdog task. iavf_set_vlan_offload_features()
+and iavf_set_queue_vlan_tag_loc() can be called directly from
+iavf_virtchnl_completion().
+
+Suggested-by: Phani Burra <phani.r.burra@intel.com>
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Reviewed-by: Alexander Lobakin <alexandr.lobakin@intel.com>
+Tested-by: Marek Szlosek <marek.szlosek@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 27 +++++++------------
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c | 8 ++++++
+ 2 files changed, 17 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 104de9a071449..68e951fe5e210 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -2689,6 +2689,15 @@ static void iavf_watchdog_task(struct work_struct *work)
+ goto restart_watchdog;
+ }
+
++ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
++ adapter->netdev_registered &&
++ !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) &&
++ rtnl_trylock()) {
++ netdev_update_features(adapter->netdev);
++ rtnl_unlock();
++ adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
++ }
++
+ if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED)
+ iavf_change_state(adapter, __IAVF_COMM_FAILED);
+
+@@ -3228,24 +3237,6 @@ static void iavf_adminq_task(struct work_struct *work)
+ } while (pending);
+ mutex_unlock(&adapter->crit_lock);
+
+- if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) {
+- if (adapter->netdev_registered ||
+- !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) {
+- struct net_device *netdev = adapter->netdev;
+-
+- rtnl_lock();
+- netdev_update_features(netdev);
+- rtnl_unlock();
+- /* Request VLAN offload settings */
+- if (VLAN_V2_ALLOWED(adapter))
+- iavf_set_vlan_offload_features
+- (adapter, 0, netdev->features);
+-
+- iavf_set_queue_vlan_tag_loc(adapter);
+- }
+-
+- adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES;
+- }
+ if ((adapter->flags &
+ (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) ||
+ adapter->state == __IAVF_RESETTING)
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 00dccdd290dce..07d37402a0df5 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2237,6 +2237,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+
+ iavf_process_config(adapter);
+ adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
++
++ /* Request VLAN offload settings */
++ if (VLAN_V2_ALLOWED(adapter))
++ iavf_set_vlan_offload_features(adapter, 0,
++ netdev->features);
++
++ iavf_set_queue_vlan_tag_loc(adapter);
++
+ was_mac_changed = !ether_addr_equal(netdev->dev_addr,
+ adapter->hw.mac.addr);
+
+--
+2.39.2
+
--- /dev/null
+From c45878593282d7f12a92cae3b219aeb3889e32f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Apr 2023 12:09:39 -0600
+Subject: iavf: send VLAN offloading caps once after VFR
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit 7dcbdf29282fbcdb646dc785e8a57ed2c2fec8ba ]
+
+When the user disables rxvlan offloading and then changes the number of
+channels, all VLAN ports are unable to receive traffic.
+
+Changing the number of channels triggers a VFR reset. During re-init, when
+VIRTCHNL_OP_GET_OFFLOAD_VLAN_V2_CAPS is received, we do:
+1 - set the IAVF_FLAG_SETUP_NETDEV_FEATURES flag
+2 - call
+ iavf_set_vlan_offload_features(adapter, 0, netdev->features);
+
+The second step sends to the PF the __default__ features, in this case
+aq_required |= IAVF_FLAG_AQ_ENABLE_CTAG_VLAN_STRIPPING
+
+While the first step forces the watchdog task to call
+netdev_update_features() -> iavf_set_features() ->
+iavf_set_vlan_offload_features(adapter, netdev->features, features).
+Since the user disabled the "rxvlan", this sets:
+aq_required |= IAVF_FLAG_AQ_DISABLE_CTAG_VLAN_STRIPPING
+
+When we start processing the AQ commands, both flags are enabled. Since we
+process DISABLE_XTAG first then ENABLE_XTAG, this results in the PF
+enabling the rxvlan offload. This breaks all communications on the VLAN
+net devices.
+
+Fix by removing the call to iavf_set_vlan_offload_features() (second
+step). Calling netdev_update_features() from watchdog task is enough for
+both init and reset paths.
+
+Fixes: 7598f4b40bd6 ("iavf: Move netdev_update_features() into watchdog task")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Reviewed-by: Leon Romanovsky <leonro@nvidia.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: c2ed2403f12c ("iavf: Wait for reset in callbacks which trigger it")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_virtchnl.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 07d37402a0df5..7b34111fd4eb1 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2238,11 +2238,6 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ iavf_process_config(adapter);
+ adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES;
+
+- /* Request VLAN offload settings */
+- if (VLAN_V2_ALLOWED(adapter))
+- iavf_set_vlan_offload_features(adapter, 0,
+- netdev->features);
+-
+ iavf_set_queue_vlan_tag_loc(adapter);
+
+ was_mac_changed = !ether_addr_equal(netdev->dev_addr,
+--
+2.39.2
+
--- /dev/null
+From 7af6ff049c18a0c4e3e4a80b523c331617b48a6f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 19 May 2023 15:46:02 -0600
+Subject: iavf: use internal state to free traffic IRQs
+
+From: Ahmed Zaki <ahmed.zaki@intel.com>
+
+[ Upstream commit a77ed5c5b768e9649be240a2d864e5cd9c6a2015 ]
+
+If the system tries to close the netdev while iavf_reset_task() is
+running, __LINK_STATE_START will be cleared and netif_running() will
+return false in iavf_reinit_interrupt_scheme(). This will result in
+iavf_free_traffic_irqs() not being called and a leak as follows:
+
+ [7632.489326] remove_proc_entry: removing non-empty directory 'irq/999', leaking at least 'iavf-enp24s0f0v0-TxRx-0'
+ [7632.490214] WARNING: CPU: 0 PID: 10 at fs/proc/generic.c:718 remove_proc_entry+0x19b/0x1b0
+
+is shown when pci_disable_msix() is later called. Fix by using the
+internal adapter state. The traffic IRQs will always exist if
+state == __IAVF_RUNNING.
+
+Fixes: 5b36e8d04b44 ("i40evf: Enable VF to request an alternate queue allocation")
+Signed-off-by: Ahmed Zaki <ahmed.zaki@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 81676c3af4b36..104de9a071449 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1941,15 +1941,16 @@ static void iavf_free_rss(struct iavf_adapter *adapter)
+ /**
+ * iavf_reinit_interrupt_scheme - Reallocate queues and vectors
+ * @adapter: board private structure
++ * @running: true if adapter->state == __IAVF_RUNNING
+ *
+ * Returns 0 on success, negative on failure
+ **/
+-static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter)
++static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool running)
+ {
+ struct net_device *netdev = adapter->netdev;
+ int err;
+
+- if (netif_running(netdev))
++ if (running)
+ iavf_free_traffic_irqs(adapter);
+ iavf_free_misc_irq(adapter);
+ iavf_reset_interrupt_capability(adapter);
+@@ -3056,7 +3057,7 @@ static void iavf_reset_task(struct work_struct *work)
+
+ if ((adapter->flags & IAVF_FLAG_REINIT_MSIX_NEEDED) ||
+ (adapter->flags & IAVF_FLAG_REINIT_ITR_NEEDED)) {
+- err = iavf_reinit_interrupt_scheme(adapter);
++ err = iavf_reinit_interrupt_scheme(adapter, running);
+ if (err)
+ goto reset_err;
+ }
+--
+2.39.2
+
--- /dev/null
+From 666e6a1e4dfcf28dffd3be1e4128f2dde21ee8cb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 5 Jun 2023 10:52:22 -0400
+Subject: iavf: Wait for reset in callbacks which trigger it
+
+From: Marcin Szycik <marcin.szycik@linux.intel.com>
+
+[ Upstream commit c2ed2403f12c74a74a0091ed5d830e72c58406e8 ]
+
+There was a fail when trying to add the interface to bonding
+right after changing the MTU on the interface. It was caused
+by bonding interface unable to open the interface due to
+interface being in __RESETTING state because of MTU change.
+
+Add new reset_waitqueue to indicate that reset has finished.
+
+Add waiting for reset to finish in callbacks which trigger hw reset:
+iavf_set_priv_flags(), iavf_change_mtu() and iavf_set_ringparam().
+We use a 5000ms timeout period because on Hyper-V based systems,
+this operation takes around 3000-4000ms. In normal circumstances,
+it doesn't take more than 500ms to complete.
+
+Add a function iavf_wait_for_reset() to reuse waiting for reset code and
+use it also in iavf_set_channels(), which already waits for reset.
+We don't use error handling in iavf_set_channels() as this could
+cause the device to be in incorrect state if the reset was scheduled
+but hit timeout or the waitng function was interrupted by a signal.
+
+Fixes: 4e5e6b5d9d13 ("iavf: Fix return of set the new channel count")
+Signed-off-by: Marcin Szycik <marcin.szycik@linux.intel.com>
+Co-developed-by: Dawid Wesierski <dawidx.wesierski@intel.com>
+Signed-off-by: Dawid Wesierski <dawidx.wesierski@intel.com>
+Signed-off-by: Sylwester Dziedziuch <sylwesterx.dziedziuch@intel.com>
+Signed-off-by: Kamil Maziarz <kamil.maziarz@intel.com>
+Signed-off-by: Mateusz Palczewski <mateusz.palczewski@intel.com>
+Tested-by: Rafal Romanowski <rafal.romanowski@intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf.h | 2 +
+ .../net/ethernet/intel/iavf/iavf_ethtool.c | 31 ++++++-----
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 51 ++++++++++++++++++-
+ .../net/ethernet/intel/iavf/iavf_virtchnl.c | 1 +
+ 4 files changed, 68 insertions(+), 17 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h
+index a716ed6bb787d..2fe44e865d0a2 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf.h
++++ b/drivers/net/ethernet/intel/iavf/iavf.h
+@@ -257,6 +257,7 @@ struct iavf_adapter {
+ struct work_struct adminq_task;
+ struct delayed_work client_task;
+ wait_queue_head_t down_waitqueue;
++ wait_queue_head_t reset_waitqueue;
+ wait_queue_head_t vc_waitqueue;
+ struct iavf_q_vector *q_vectors;
+ struct list_head vlan_filter_list;
+@@ -582,4 +583,5 @@ void iavf_add_adv_rss_cfg(struct iavf_adapter *adapter);
+ void iavf_del_adv_rss_cfg(struct iavf_adapter *adapter);
+ struct iavf_mac_filter *iavf_add_filter(struct iavf_adapter *adapter,
+ const u8 *macaddr);
++int iavf_wait_for_reset(struct iavf_adapter *adapter);
+ #endif /* _IAVF_H_ */
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+index 4746ee517c75a..73219c5069290 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c
+@@ -484,6 +484,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+ u32 orig_flags, new_flags, changed_flags;
++ int ret = 0;
+ u32 i;
+
+ orig_flags = READ_ONCE(adapter->flags);
+@@ -533,10 +534,13 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags)
+ if (netif_running(netdev)) {
+ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+ queue_work(adapter->wq, &adapter->reset_task);
++ ret = iavf_wait_for_reset(adapter);
++ if (ret)
++ netdev_warn(netdev, "Changing private flags timeout or interrupted waiting for reset");
+ }
+ }
+
+- return 0;
++ return ret;
+ }
+
+ /**
+@@ -627,6 +631,7 @@ static int iavf_set_ringparam(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+ u32 new_rx_count, new_tx_count;
++ int ret = 0;
+
+ if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending))
+ return -EINVAL;
+@@ -673,9 +678,12 @@ static int iavf_set_ringparam(struct net_device *netdev,
+ if (netif_running(netdev)) {
+ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+ queue_work(adapter->wq, &adapter->reset_task);
++ ret = iavf_wait_for_reset(adapter);
++ if (ret)
++ netdev_warn(netdev, "Changing ring parameters timeout or interrupted waiting for reset");
+ }
+
+- return 0;
++ return ret;
+ }
+
+ /**
+@@ -1830,7 +1838,7 @@ static int iavf_set_channels(struct net_device *netdev,
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
+ u32 num_req = ch->combined_count;
+- int i;
++ int ret = 0;
+
+ if ((adapter->vf_res->vf_cap_flags & VIRTCHNL_VF_OFFLOAD_ADQ) &&
+ adapter->num_tc) {
+@@ -1854,20 +1862,11 @@ static int iavf_set_channels(struct net_device *netdev,
+ adapter->flags |= IAVF_FLAG_REINIT_ITR_NEEDED;
+ iavf_schedule_reset(adapter);
+
+- /* wait for the reset is done */
+- for (i = 0; i < IAVF_RESET_WAIT_COMPLETE_COUNT; i++) {
+- msleep(IAVF_RESET_WAIT_MS);
+- if (adapter->flags & IAVF_FLAG_RESET_PENDING)
+- continue;
+- break;
+- }
+- if (i == IAVF_RESET_WAIT_COMPLETE_COUNT) {
+- adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+- adapter->num_req_queues = 0;
+- return -EOPNOTSUPP;
+- }
++ ret = iavf_wait_for_reset(adapter);
++ if (ret)
++ netdev_warn(netdev, "Changing channel count timeout or interrupted waiting for reset");
+
+- return 0;
++ return ret;
+ }
+
+ /**
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index d5b1dcfe0ccdd..c2739071149de 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -166,6 +166,45 @@ static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev)
+ return netdev_priv(pci_get_drvdata(pdev));
+ }
+
++/**
++ * iavf_is_reset_in_progress - Check if a reset is in progress
++ * @adapter: board private structure
++ */
++static bool iavf_is_reset_in_progress(struct iavf_adapter *adapter)
++{
++ if (adapter->state == __IAVF_RESETTING ||
++ adapter->flags & (IAVF_FLAG_RESET_PENDING |
++ IAVF_FLAG_RESET_NEEDED))
++ return true;
++
++ return false;
++}
++
++/**
++ * iavf_wait_for_reset - Wait for reset to finish.
++ * @adapter: board private structure
++ *
++ * Returns 0 if reset finished successfully, negative on timeout or interrupt.
++ */
++int iavf_wait_for_reset(struct iavf_adapter *adapter)
++{
++ int ret = wait_event_interruptible_timeout(adapter->reset_waitqueue,
++ !iavf_is_reset_in_progress(adapter),
++ msecs_to_jiffies(5000));
++
++ /* If ret < 0 then it means wait was interrupted.
++ * If ret == 0 then it means we got a timeout while waiting
++ * for reset to finish.
++ * If ret > 0 it means reset has finished.
++ */
++ if (ret > 0)
++ return 0;
++ else if (ret < 0)
++ return -EINTR;
++ else
++ return -EBUSY;
++}
++
+ /**
+ * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code
+ * @hw: pointer to the HW structure
+@@ -3161,6 +3200,7 @@ static void iavf_reset_task(struct work_struct *work)
+
+ adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED;
+
++ wake_up(&adapter->reset_waitqueue);
+ mutex_unlock(&adapter->client_lock);
+ mutex_unlock(&adapter->crit_lock);
+
+@@ -4325,6 +4365,7 @@ static int iavf_close(struct net_device *netdev)
+ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+ {
+ struct iavf_adapter *adapter = netdev_priv(netdev);
++ int ret = 0;
+
+ netdev_dbg(netdev, "changing MTU from %d to %d\n",
+ netdev->mtu, new_mtu);
+@@ -4337,9 +4378,14 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu)
+ if (netif_running(netdev)) {
+ adapter->flags |= IAVF_FLAG_RESET_NEEDED;
+ queue_work(adapter->wq, &adapter->reset_task);
++ ret = iavf_wait_for_reset(adapter);
++ if (ret < 0)
++ netdev_warn(netdev, "MTU change interrupted waiting for reset");
++ else if (ret)
++ netdev_warn(netdev, "MTU change timed out waiting for reset");
+ }
+
+- return 0;
++ return ret;
+ }
+
+ #define NETIF_VLAN_OFFLOAD_FEATURES (NETIF_F_HW_VLAN_CTAG_RX | \
+@@ -4942,6 +4988,9 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+ /* Setup the wait queue for indicating transition to down status */
+ init_waitqueue_head(&adapter->down_waitqueue);
+
++ /* Setup the wait queue for indicating transition to running state */
++ init_waitqueue_head(&adapter->reset_waitqueue);
++
+ /* Setup the wait queue for indicating virtchannel events */
+ init_waitqueue_head(&adapter->vc_waitqueue);
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+index 7b34111fd4eb1..eec7ac3b7f6ee 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c
+@@ -2285,6 +2285,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter,
+ case VIRTCHNL_OP_ENABLE_QUEUES:
+ /* enable transmits */
+ iavf_irq_enable(adapter, true);
++ wake_up(&adapter->reset_waitqueue);
+ adapter->flags &= ~IAVF_FLAG_QUEUES_DISABLED;
+ break;
+ case VIRTCHNL_OP_DISABLE_QUEUES:
+--
+2.39.2
+
--- /dev/null
+From 1fce30757b3c297f96e47f71e0c036d447f63664 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 10:47:32 -0700
+Subject: igb: Fix igb_down hung on surprise removal
+
+From: Ying Hsu <yinghsu@chromium.org>
+
+[ Upstream commit 004d25060c78fc31f66da0fa439c544dda1ac9d5 ]
+
+In a setup where a Thunderbolt hub connects to Ethernet and a display
+through USB Type-C, users may experience a hung task timeout when they
+remove the cable between the PC and the Thunderbolt hub.
+This is because the igb_down function is called multiple times when
+the Thunderbolt hub is unplugged. For example, the igb_io_error_detected
+triggers the first call, and the igb_remove triggers the second call.
+The second call to igb_down will block at napi_synchronize.
+Here's the call trace:
+ __schedule+0x3b0/0xddb
+ ? __mod_timer+0x164/0x5d3
+ schedule+0x44/0xa8
+ schedule_timeout+0xb2/0x2a4
+ ? run_local_timers+0x4e/0x4e
+ msleep+0x31/0x38
+ igb_down+0x12c/0x22a [igb 6615058754948bfde0bf01429257eb59f13030d4]
+ __igb_close+0x6f/0x9c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+ igb_close+0x23/0x2b [igb 6615058754948bfde0bf01429257eb59f13030d4]
+ __dev_close_many+0x95/0xec
+ dev_close_many+0x6e/0x103
+ unregister_netdevice_many+0x105/0x5b1
+ unregister_netdevice_queue+0xc2/0x10d
+ unregister_netdev+0x1c/0x23
+ igb_remove+0xa7/0x11c [igb 6615058754948bfde0bf01429257eb59f13030d4]
+ pci_device_remove+0x3f/0x9c
+ device_release_driver_internal+0xfe/0x1b4
+ pci_stop_bus_device+0x5b/0x7f
+ pci_stop_bus_device+0x30/0x7f
+ pci_stop_bus_device+0x30/0x7f
+ pci_stop_and_remove_bus_device+0x12/0x19
+ pciehp_unconfigure_device+0x76/0xe9
+ pciehp_disable_slot+0x6e/0x131
+ pciehp_handle_presence_or_link_change+0x7a/0x3f7
+ pciehp_ist+0xbe/0x194
+ irq_thread_fn+0x22/0x4d
+ ? irq_thread+0x1fd/0x1fd
+ irq_thread+0x17b/0x1fd
+ ? irq_forced_thread_fn+0x5f/0x5f
+ kthread+0x142/0x153
+ ? __irq_get_irqchip_state+0x46/0x46
+ ? kthread_associate_blkcg+0x71/0x71
+ ret_from_fork+0x1f/0x30
+
+In this case, igb_io_error_detected detaches the network interface
+and requests a PCIE slot reset, however, the PCIE reset callback is
+not being invoked and thus the Ethernet connection breaks down.
+As the PCIE error in this case is a non-fatal one, requesting a
+slot reset can be avoided.
+This patch fixes the task hung issue and preserves Ethernet
+connection by ignoring non-fatal PCIE errors.
+
+Signed-off-by: Ying Hsu <yinghsu@chromium.org>
+Tested-by: Pucha Himasekhar Reddy <himasekharx.reddy.pucha@intel.com> (A Contingent worker at Intel)
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/20230620174732.4145155-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igb/igb_main.c | 5 +++++
+ 1 file changed, 5 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
+index 18ffbc892f86c..3e0444354632d 100644
+--- a/drivers/net/ethernet/intel/igb/igb_main.c
++++ b/drivers/net/ethernet/intel/igb/igb_main.c
+@@ -9585,6 +9585,11 @@ static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
+ struct net_device *netdev = pci_get_drvdata(pdev);
+ struct igb_adapter *adapter = netdev_priv(netdev);
+
++ if (state == pci_channel_io_normal) {
++ dev_warn(&pdev->dev, "Non-correctable non-fatal error reported.\n");
++ return PCI_ERS_RESULT_CAN_RECOVER;
++ }
++
+ netif_device_detach(netdev);
+
+ if (state == pci_channel_io_perm_failure)
+--
+2.39.2
+
--- /dev/null
+From c01002df2d8dadbc072d6f4a641153969ae81dc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Apr 2023 09:36:11 +0200
+Subject: igc: Avoid transmit queue timeout for XDP
+
+From: Kurt Kanzenbach <kurt@linutronix.de>
+
+[ Upstream commit 95b681485563c64585de78662ee52d06b7fa47d9 ]
+
+High XDP load triggers the netdev watchdog:
+
+|NETDEV WATCHDOG: enp3s0 (igc): transmit queue 2 timed out
+
+The reason is the Tx queue transmission start (txq->trans_start) is not updated
+in XDP code path. Therefore, add it for all XDP transmission functions.
+
+Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Stable-dep-of: 78adb4bcf99e ("igc: Prevent garbled TX queue with XDP ZEROCOPY")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 8 ++++++++
+ 1 file changed, 8 insertions(+)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index 273941f90f066..ade4bde47c65a 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2402,6 +2402,8 @@ static int igc_xdp_xmit_back(struct igc_adapter *adapter, struct xdp_buff *xdp)
+ nq = txring_txq(ring);
+
+ __netif_tx_lock(nq, cpu);
++ /* Avoid transmit queue timeout since we share it with the slow path */
++ txq_trans_cond_update(nq);
+ res = igc_xdp_init_tx_descriptor(ring, xdpf);
+ __netif_tx_unlock(nq);
+ return res;
+@@ -2804,6 +2806,9 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+
+ __netif_tx_lock(nq, cpu);
+
++ /* Avoid transmit queue timeout since we share it with the slow path */
++ txq_trans_cond_update(nq);
++
+ budget = igc_desc_unused(ring);
+
+ while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+@@ -6297,6 +6302,9 @@ static int igc_xdp_xmit(struct net_device *dev, int num_frames,
+
+ __netif_tx_lock(nq, cpu);
+
++ /* Avoid transmit queue timeout since we share it with the slow path */
++ txq_trans_cond_update(nq);
++
+ drops = 0;
+ for (i = 0; i < num_frames; i++) {
+ int err;
+--
+2.39.2
+
--- /dev/null
+From d6a3517285a333ba4076b9e7721da2053a4d7dd2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 10:54:44 -0700
+Subject: igc: Prevent garbled TX queue with XDP ZEROCOPY
+
+From: Florian Kauer <florian.kauer@linutronix.de>
+
+[ Upstream commit 78adb4bcf99effbb960c5f9091e2e062509d1030 ]
+
+In normal operation, each populated queue item has
+next_to_watch pointing to the last TX desc of the packet,
+while each cleaned item has it set to 0. In particular,
+next_to_use that points to the next (necessarily clean)
+item to use has next_to_watch set to 0.
+
+When the TX queue is used both by an application using
+AF_XDP with ZEROCOPY as well as a second non-XDP application
+generating high traffic, the queue pointers can get in
+an invalid state where next_to_use points to an item
+where next_to_watch is NOT set to 0.
+
+However, the implementation assumes at several places
+that this is never the case, so if it does hold,
+bad things happen. In particular, within the loop inside
+of igc_clean_tx_irq(), next_to_clean can overtake next_to_use.
+Finally, this prevents any further transmission via
+this queue and it never gets unblocked or signaled.
+Secondly, if the queue is in this garbled state,
+the inner loop of igc_clean_tx_ring() will never terminate,
+completely hogging a CPU core.
+
+The reason is that igc_xdp_xmit_zc() reads next_to_use
+before acquiring the lock, and writing it back
+(potentially unmodified) later. If it got modified
+before locking, the outdated next_to_use is written
+pointing to an item that was already used elsewhere
+(and thus next_to_watch got written).
+
+Fixes: 9acf59a752d4 ("igc: Enable TX via AF_XDP zero-copy")
+Signed-off-by: Florian Kauer <florian.kauer@linutronix.de>
+Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
+Tested-by: Kurt Kanzenbach <kurt@linutronix.de>
+Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Tested-by: Naama Meir <naamax.meir@linux.intel.com>
+Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
+Link: https://lore.kernel.org/r/20230717175444.3217831-1-anthony.l.nguyen@intel.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/igc/igc_main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
+index ade4bde47c65a..2e091a4a065e7 100644
+--- a/drivers/net/ethernet/intel/igc/igc_main.c
++++ b/drivers/net/ethernet/intel/igc/igc_main.c
+@@ -2797,9 +2797,8 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+ struct netdev_queue *nq = txring_txq(ring);
+ union igc_adv_tx_desc *tx_desc = NULL;
+ int cpu = smp_processor_id();
+- u16 ntu = ring->next_to_use;
+ struct xdp_desc xdp_desc;
+- u16 budget;
++ u16 budget, ntu;
+
+ if (!netif_carrier_ok(ring->netdev))
+ return;
+@@ -2809,6 +2808,7 @@ static void igc_xdp_xmit_zc(struct igc_ring *ring)
+ /* Avoid transmit queue timeout since we share it with the slow path */
+ txq_trans_cond_update(nq);
+
++ ntu = ring->next_to_use;
+ budget = igc_desc_unused(ring);
+
+ while (xsk_tx_peek_desc(pool, &xdp_desc) && budget--) {
+--
+2.39.2
+
--- /dev/null
+From 84ac2024e94e7308d618a49933dee91acc662e7c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 16:49:15 +0800
+Subject: kallsyms: Correctly sequence symbols when CONFIG_LTO_CLANG=y
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+[ Upstream commit 010a0aad39fccceba4a07d30d163158a39c704f3 ]
+
+LLVM appends various suffixes for local functions and variables, suffixes
+observed:
+ - foo.llvm.[0-9a-f]+
+ - foo.[0-9a-f]+
+
+Therefore, when CONFIG_LTO_CLANG=y, kallsyms_lookup_name() needs to
+truncate the suffix of the symbol name before comparing the local function
+or variable name.
+
+Old implementation code:
+- if (strcmp(namebuf, name) == 0)
+- return kallsyms_sym_address(i);
+- if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
+- return kallsyms_sym_address(i);
+
+The preceding process is traversed by address from low to high. That is,
+for those with the same name after the suffix is removed, the one with
+the smallest address is returned first. Therefore, when sorting in the
+tool, if the raw names are the same, they should be sorted by address in
+ascending order.
+
+ASCII[.] = 2e
+ASCII[0-9] = 30,39
+ASCII[A-Z] = 41,5a
+ASCII[_] = 5f
+ASCII[a-z] = 61,7a
+
+According to the preceding ASCII code values, the following sorting result
+is strictly followed.
+ ---------------------------------
+| main-key | sub-key |
+|---------------------------------|
+| | addr_lowest |
+| <name> | ... |
+| <name>.<suffix> | ... |
+| | addr_highest |
+|---------------------------------|
+| <name>?<others> | | //? is [_A-Za-z0-9]
+ ---------------------------------
+
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ scripts/kallsyms.c | 36 ++++++++++++++++++++++++++++++++++--
+ scripts/link-vmlinux.sh | 4 ++++
+ 2 files changed, 38 insertions(+), 2 deletions(-)
+
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index dcb744a067e5e..67ef9aa14a770 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -78,6 +78,7 @@ static unsigned int table_size, table_cnt;
+ static int all_symbols;
+ static int absolute_percpu;
+ static int base_relative;
++static int lto_clang;
+
+ static int token_profit[0x10000];
+
+@@ -89,7 +90,7 @@ static unsigned char best_table_len[256];
+ static void usage(void)
+ {
+ fprintf(stderr, "Usage: kallsyms [--all-symbols] [--absolute-percpu] "
+- "[--base-relative] in.map > out.S\n");
++ "[--base-relative] [--lto-clang] in.map > out.S\n");
+ exit(1);
+ }
+
+@@ -411,6 +412,34 @@ static int symbol_absolute(const struct sym_entry *s)
+ return s->percpu_absolute;
+ }
+
++static char * s_name(char *buf)
++{
++ /* Skip the symbol type */
++ return buf + 1;
++}
++
++static void cleanup_symbol_name(char *s)
++{
++ char *p;
++
++ if (!lto_clang)
++ return;
++
++ /*
++ * ASCII[.] = 2e
++ * ASCII[0-9] = 30,39
++ * ASCII[A-Z] = 41,5a
++ * ASCII[_] = 5f
++ * ASCII[a-z] = 61,7a
++ *
++ * As above, replacing '.' with '\0' does not affect the main sorting,
++ * but it helps us with subsorting.
++ */
++ p = strchr(s, '.');
++ if (p)
++ *p = '\0';
++}
++
+ static int compare_names(const void *a, const void *b)
+ {
+ int ret;
+@@ -421,7 +450,9 @@ static int compare_names(const void *a, const void *b)
+
+ expand_symbol(sa->sym, sa->len, sa_namebuf);
+ expand_symbol(sb->sym, sb->len, sb_namebuf);
+- ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]);
++ cleanup_symbol_name(s_name(sa_namebuf));
++ cleanup_symbol_name(s_name(sb_namebuf));
++ ret = strcmp(s_name(sa_namebuf), s_name(sb_namebuf));
+ if (!ret) {
+ if (sa->addr > sb->addr)
+ return 1;
+@@ -855,6 +886,7 @@ int main(int argc, char **argv)
+ {"all-symbols", no_argument, &all_symbols, 1},
+ {"absolute-percpu", no_argument, &absolute_percpu, 1},
+ {"base-relative", no_argument, &base_relative, 1},
++ {"lto-clang", no_argument, <o_clang, 1},
+ {},
+ };
+
+diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh
+index 918470d768e9c..32e573943cf03 100755
+--- a/scripts/link-vmlinux.sh
++++ b/scripts/link-vmlinux.sh
+@@ -156,6 +156,10 @@ kallsyms()
+ kallsymopt="${kallsymopt} --base-relative"
+ fi
+
++ if is_enabled CONFIG_LTO_CLANG; then
++ kallsymopt="${kallsymopt} --lto-clang"
++ fi
++
+ info KSYMS ${2}
+ scripts/kallsyms ${kallsymopt} ${1} > ${2}
+ }
+--
+2.39.2
+
--- /dev/null
+From 0abbf42237e70e5ca1bdbcd75de6eed8c1bd4077 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Nov 2022 16:49:14 +0800
+Subject: kallsyms: Improve the performance of kallsyms_lookup_name()
+
+From: Zhen Lei <thunder.leizhen@huawei.com>
+
+[ Upstream commit 60443c88f3a89fd303a9e8c0e84895910675c316 ]
+
+Currently, to search for a symbol, we need to expand the symbols in
+'kallsyms_names' one by one, and then use the expanded string for
+comparison. It's O(n).
+
+If we sort names in ascending order like addresses, we can also use
+binary search. It's O(log(n)).
+
+In order not to change the implementation of "/proc/kallsyms", the table
+kallsyms_names[] is still stored in a one-to-one correspondence with the
+address in ascending order.
+
+Add array kallsyms_seqs_of_names[], it's indexed by the sequence number
+of the sorted names, and the corresponding content is the sequence number
+of the sorted addresses. For example:
+Assume that the index of NameX in array kallsyms_seqs_of_names[] is 'i',
+the content of kallsyms_seqs_of_names[i] is 'k', then the corresponding
+address of NameX is kallsyms_addresses[k]. The offset in kallsyms_names[]
+is get_symbol_offset(k).
+
+Note that the memory usage will increase by (4 * kallsyms_num_syms)
+bytes, the next two patches will reduce (1 * kallsyms_num_syms) bytes
+and properly handle the case CONFIG_LTO_CLANG=y.
+
+Performance test results: (x86)
+Before:
+min=234, max=10364402, avg=5206926
+min=267, max=11168517, avg=5207587
+After:
+min=1016, max=90894, avg=7272
+min=1014, max=93470, avg=7293
+
+The average lookup performance of kallsyms_lookup_name() improved 715x.
+
+Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com>
+Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
+Stable-dep-of: 8cc32a9bbf29 ("kallsyms: strip LTO-only suffixes from promoted global functions")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kallsyms.c | 86 +++++++++++++++++++++++++++++++++-----
+ kernel/kallsyms_internal.h | 1 +
+ scripts/kallsyms.c | 37 ++++++++++++++++
+ 3 files changed, 113 insertions(+), 11 deletions(-)
+
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index 60c20f301a6ba..ba351dfa109b6 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -187,26 +187,90 @@ static bool cleanup_symbol_name(char *s)
+ return false;
+ }
+
++static int compare_symbol_name(const char *name, char *namebuf)
++{
++ int ret;
++
++ ret = strcmp(name, namebuf);
++ if (!ret)
++ return ret;
++
++ if (cleanup_symbol_name(namebuf) && !strcmp(name, namebuf))
++ return 0;
++
++ return ret;
++}
++
++static int kallsyms_lookup_names(const char *name,
++ unsigned int *start,
++ unsigned int *end)
++{
++ int ret;
++ int low, mid, high;
++ unsigned int seq, off;
++ char namebuf[KSYM_NAME_LEN];
++
++ low = 0;
++ high = kallsyms_num_syms - 1;
++
++ while (low <= high) {
++ mid = low + (high - low) / 2;
++ seq = kallsyms_seqs_of_names[mid];
++ off = get_symbol_offset(seq);
++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++ ret = compare_symbol_name(name, namebuf);
++ if (ret > 0)
++ low = mid + 1;
++ else if (ret < 0)
++ high = mid - 1;
++ else
++ break;
++ }
++
++ if (low > high)
++ return -ESRCH;
++
++ low = mid;
++ while (low) {
++ seq = kallsyms_seqs_of_names[low - 1];
++ off = get_symbol_offset(seq);
++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++ if (compare_symbol_name(name, namebuf))
++ break;
++ low--;
++ }
++ *start = low;
++
++ if (end) {
++ high = mid;
++ while (high < kallsyms_num_syms - 1) {
++ seq = kallsyms_seqs_of_names[high + 1];
++ off = get_symbol_offset(seq);
++ kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
++ if (compare_symbol_name(name, namebuf))
++ break;
++ high++;
++ }
++ *end = high;
++ }
++
++ return 0;
++}
++
+ /* Lookup the address for this symbol. Returns 0 if not found. */
+ unsigned long kallsyms_lookup_name(const char *name)
+ {
+- char namebuf[KSYM_NAME_LEN];
+- unsigned long i;
+- unsigned int off;
++ int ret;
++ unsigned int i;
+
+ /* Skip the search for empty string. */
+ if (!*name)
+ return 0;
+
+- for (i = 0, off = 0; i < kallsyms_num_syms; i++) {
+- off = kallsyms_expand_symbol(off, namebuf, ARRAY_SIZE(namebuf));
+-
+- if (strcmp(namebuf, name) == 0)
+- return kallsyms_sym_address(i);
++ ret = kallsyms_lookup_names(name, &i, NULL);
++ if (!ret)
++ return kallsyms_sym_address(kallsyms_seqs_of_names[i]);
+
+- if (cleanup_symbol_name(namebuf) && strcmp(namebuf, name) == 0)
+- return kallsyms_sym_address(i);
+- }
+ return module_kallsyms_lookup_name(name);
+ }
+
+diff --git a/kernel/kallsyms_internal.h b/kernel/kallsyms_internal.h
+index 2d0c6f2f0243a..a04b7a5cb1e3e 100644
+--- a/kernel/kallsyms_internal.h
++++ b/kernel/kallsyms_internal.h
+@@ -26,5 +26,6 @@ extern const char kallsyms_token_table[] __weak;
+ extern const u16 kallsyms_token_index[] __weak;
+
+ extern const unsigned int kallsyms_markers[] __weak;
++extern const unsigned int kallsyms_seqs_of_names[] __weak;
+
+ #endif // LINUX_KALLSYMS_INTERNAL_H_
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 03fa07ad45d95..dcb744a067e5e 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -49,6 +49,7 @@ _Static_assert(
+ struct sym_entry {
+ unsigned long long addr;
+ unsigned int len;
++ unsigned int seq;
+ unsigned int start_pos;
+ unsigned int percpu_absolute;
+ unsigned char sym[];
+@@ -410,6 +411,35 @@ static int symbol_absolute(const struct sym_entry *s)
+ return s->percpu_absolute;
+ }
+
++static int compare_names(const void *a, const void *b)
++{
++ int ret;
++ char sa_namebuf[KSYM_NAME_LEN];
++ char sb_namebuf[KSYM_NAME_LEN];
++ const struct sym_entry *sa = *(const struct sym_entry **)a;
++ const struct sym_entry *sb = *(const struct sym_entry **)b;
++
++ expand_symbol(sa->sym, sa->len, sa_namebuf);
++ expand_symbol(sb->sym, sb->len, sb_namebuf);
++ ret = strcmp(&sa_namebuf[1], &sb_namebuf[1]);
++ if (!ret) {
++ if (sa->addr > sb->addr)
++ return 1;
++ else if (sa->addr < sb->addr)
++ return -1;
++
++ /* keep old order */
++ return (int)(sa->seq - sb->seq);
++ }
++
++ return ret;
++}
++
++static void sort_symbols_by_name(void)
++{
++ qsort(table, table_cnt, sizeof(table[0]), compare_names);
++}
++
+ static void write_src(void)
+ {
+ unsigned int i, k, off;
+@@ -495,6 +525,7 @@ static void write_src(void)
+ for (i = 0; i < table_cnt; i++) {
+ if ((i & 0xFF) == 0)
+ markers[i >> 8] = off;
++ table[i]->seq = i;
+
+ /* There cannot be any symbol of length zero. */
+ if (table[i]->len == 0) {
+@@ -535,6 +566,12 @@ static void write_src(void)
+
+ free(markers);
+
++ sort_symbols_by_name();
++ output_label("kallsyms_seqs_of_names");
++ for (i = 0; i < table_cnt; i++)
++ printf("\t.long\t%u\n", table[i]->seq);
++ printf("\n");
++
+ output_label("kallsyms_token_table");
+ off = 0;
+ for (i = 0; i < 256; i++) {
+--
+2.39.2
+
--- /dev/null
+From 8ed9d429c7185d4b3fe9ef6360e3f9e6f63265c1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 28 Jun 2023 11:19:26 -0700
+Subject: kallsyms: strip LTO-only suffixes from promoted global functions
+
+From: Yonghong Song <yhs@fb.com>
+
+[ Upstream commit 8cc32a9bbf2934d90762d9de0187adcb5ad46a11 ]
+
+Commit 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions")
+stripped all function/variable suffixes started with '.' regardless
+of whether those suffixes are generated at LTO mode or not. In fact,
+as far as I know, in LTO mode, when a static function/variable is
+promoted to the global scope, '.llvm.<...>' suffix is added.
+
+The existing mechanism breaks live patch for a LTO kernel even if
+no <symbol>.llvm.<...> symbols are involved. For example, for the following
+kernel symbols:
+ $ grep bpf_verifier_vlog /proc/kallsyms
+ ffffffff81549f60 t bpf_verifier_vlog
+ ffffffff8268b430 d bpf_verifier_vlog._entry
+ ffffffff8282a958 d bpf_verifier_vlog._entry_ptr
+ ffffffff82e12a1f d bpf_verifier_vlog.__already_done
+'bpf_verifier_vlog' is a static function. '_entry', '_entry_ptr' and
+'__already_done' are static variables used inside 'bpf_verifier_vlog',
+so llvm promotes them to file-level static with prefix 'bpf_verifier_vlog.'.
+Note that the func-level to file-level static function promotion also
+happens without LTO.
+
+Given a symbol name 'bpf_verifier_vlog', with LTO kernel, current mechanism will
+return 4 symbols to live patch subsystem which current live patching
+subsystem cannot handle it. With non-LTO kernel, only one symbol
+is returned.
+
+In [1], we have a lengthy discussion, the suggestion is to separate two
+cases:
+ (1). new symbols with suffix which are generated regardless of whether
+ LTO is enabled or not, and
+ (2). new symbols with suffix generated only when LTO is enabled.
+
+The cleanup_symbol_name() should only remove suffixes for case (2).
+Case (1) should not be changed so it can work uniformly with or without LTO.
+
+This patch removed LTO-only suffix '.llvm.<...>' so live patching and
+tracing should work the same way for non-LTO kernel.
+The cleanup_symbol_name() in scripts/kallsyms.c is also changed to have the same
+filtering pattern so both kernel and kallsyms tool have the same
+expectation on the order of symbols.
+
+ [1] https://lore.kernel.org/live-patching/20230615170048.2382735-1-song@kernel.org/T/#u
+
+Fixes: 6eb4bd92c1ce ("kallsyms: strip LTO suffixes from static functions")
+Reported-by: Song Liu <song@kernel.org>
+Signed-off-by: Yonghong Song <yhs@fb.com>
+Reviewed-by: Zhen Lei <thunder.leizhen@huawei.com>
+Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
+Acked-by: Song Liu <song@kernel.org>
+Link: https://lore.kernel.org/r/20230628181926.4102448-1-yhs@fb.com
+Signed-off-by: Kees Cook <keescook@chromium.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/kallsyms.c | 5 ++---
+ scripts/kallsyms.c | 6 +++---
+ 2 files changed, 5 insertions(+), 6 deletions(-)
+
+diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c
+index ba351dfa109b6..676328a7c8c75 100644
+--- a/kernel/kallsyms.c
++++ b/kernel/kallsyms.c
+@@ -174,11 +174,10 @@ static bool cleanup_symbol_name(char *s)
+ * LLVM appends various suffixes for local functions and variables that
+ * must be promoted to global scope as part of LTO. This can break
+ * hooking of static functions with kprobes. '.' is not a valid
+- * character in an identifier in C. Suffixes observed:
++ * character in an identifier in C. Suffixes only in LLVM LTO observed:
+ * - foo.llvm.[0-9a-f]+
+- * - foo.[0-9a-f]+
+ */
+- res = strchr(s, '.');
++ res = strstr(s, ".llvm.");
+ if (res) {
+ *res = '\0';
+ return true;
+diff --git a/scripts/kallsyms.c b/scripts/kallsyms.c
+index 67ef9aa14a770..51edc73e2ebf8 100644
+--- a/scripts/kallsyms.c
++++ b/scripts/kallsyms.c
+@@ -432,10 +432,10 @@ static void cleanup_symbol_name(char *s)
+ * ASCII[_] = 5f
+ * ASCII[a-z] = 61,7a
+ *
+- * As above, replacing '.' with '\0' does not affect the main sorting,
+- * but it helps us with subsorting.
++ * As above, replacing the first '.' in ".llvm." with '\0' does not
++ * affect the main sorting, but it helps us with subsorting.
+ */
+- p = strchr(s, '.');
++ p = strstr(s, ".llvm.");
+ if (p)
+ *p = '\0';
+ }
+--
+2.39.2
+
--- /dev/null
+From e9fa3eef2ea63154cf4655e320d9deee9b91fb21 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 10:41:51 -0700
+Subject: llc: Don't drop packet from non-root netns.
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 6631463b6e6673916d2481f692938f393148aa82 ]
+
+Now these upper layer protocol handlers can be called from llc_rcv()
+as sap->rcv_func(), which is registered by llc_sap_open().
+
+ * function which is passed to register_8022_client()
+ -> no in-kernel user calls register_8022_client().
+
+ * snap_rcv()
+ `- proto->rcvfunc() : registered by register_snap_client()
+ -> aarp_rcv() and atalk_rcv() drop packets from non-root netns
+
+ * stp_pdu_rcv()
+ `- garp_protos[]->rcv() : registered by stp_proto_register()
+ -> garp_pdu_rcv() and br_stp_rcv() are netns-aware
+
+So, we can safely remove the netns restriction in llc_rcv().
+
+Fixes: e730c15519d0 ("[NET]: Make packet reception network namespace safe")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/llc/llc_input.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c
+index c309b72a58779..7cac441862e21 100644
+--- a/net/llc/llc_input.c
++++ b/net/llc/llc_input.c
+@@ -163,9 +163,6 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev,
+ void (*sta_handler)(struct sk_buff *skb);
+ void (*sap_handler)(struct llc_sap *sap, struct sk_buff *skb);
+
+- if (!net_eq(dev_net(dev), &init_net))
+- goto drop;
+-
+ /*
+ * When the interface is in promisc. mode, drop all the crap that it
+ * receives, do not try to analyse it.
+--
+2.39.2
+
--- /dev/null
+From ef01382e1c734299b56bde7f6a5678e14939f8a4 Mon Sep 17 00:00:00 2001
+From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
+Date: Thu, 22 Jun 2023 17:43:57 -0600
+Subject: [PATCH AUTOSEL 4.19 09/11] MIPS: dec: prom: Address -Warray-bounds
+ warning
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 7b191b9b55df2a844bd32d1d380f47a7df1c2896 ]
+
+Zero-length arrays are deprecated, and we are replacing them with flexible
+array members instead. So, replace zero-length array with flexible-array
+member in struct memmap.
+
+Address the following warning found after building (with GCC-13) mips64
+with decstation_64_defconfig:
+In function 'rex_setup_memory_region',
+ inlined from 'prom_meminit' at arch/mips/dec/prom/memory.c:91:3:
+arch/mips/dec/prom/memory.c:72:31: error: array subscript i is outside array bounds of 'unsigned char[0]' [-Werror=array-bounds=]
+ 72 | if (bm->bitmap[i] == 0xff)
+ | ~~~~~~~~~~^~~
+In file included from arch/mips/dec/prom/memory.c:16:
+./arch/mips/include/asm/dec/prom.h: In function 'prom_meminit':
+./arch/mips/include/asm/dec/prom.h:73:23: note: while referencing 'bitmap'
+ 73 | unsigned char bitmap[0];
+
+This helps with the ongoing efforts to globally enable -Warray-bounds.
+
+This results in no differences in binary output.
+
+Link: https://github.com/KSPP/linux/issues/79
+Link: https://github.com/KSPP/linux/issues/323
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/mips/include/asm/dec/prom.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/mips/include/asm/dec/prom.h b/arch/mips/include/asm/dec/prom.h
+index 09538ff5e9245..6f0405ba27d6d 100644
+--- a/arch/mips/include/asm/dec/prom.h
++++ b/arch/mips/include/asm/dec/prom.h
+@@ -74,7 +74,7 @@ static inline bool prom_is_rex(u32 magic)
+ */
+ typedef struct {
+ int pagesize;
+- unsigned char bitmap[0];
++ unsigned char bitmap[];
+ } memmap;
+
+
+--
+2.39.2
+
--- /dev/null
+From 2ad98a4006851a288ac932c2345ea6a91933390c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 17:46:22 -0700
+Subject: net: dsa: microchip: correct KSZ8795 static MAC table access
+
+From: Tristram Ha <Tristram.Ha@microchip.com>
+
+[ Upstream commit 4bdf79d686b49ac49373b36466acfb93972c7d7c ]
+
+The KSZ8795 driver code was modified to use on KSZ8863/73, which has
+different register definitions. Some of the new KSZ8795 register
+information are wrong compared to previous code.
+
+KSZ8795 also behaves differently in that the STATIC_MAC_TABLE_USE_FID
+and STATIC_MAC_TABLE_FID bits are off by 1 when doing MAC table reading
+than writing. To compensate that a special code was added to shift the
+register value by 1 before applying those bits. This is wrong when the
+code is running on KSZ8863, so this special code is only executed when
+KSZ8795 is detected.
+
+Fixes: 4b20a07e103f ("net: dsa: microchip: ksz8795: add support for ksz88xx chips")
+Signed-off-by: Tristram Ha <Tristram.Ha@microchip.com>
+Reviewed-by: Horatiu Vultur <horatiu.vultur@microchip.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c | 8 +++++++-
+ drivers/net/dsa/microchip/ksz_common.c | 8 ++++----
+ drivers/net/dsa/microchip/ksz_common.h | 7 +++++++
+ 3 files changed, 18 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 6639fae56da7f..c63e082dc57dc 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -437,7 +437,13 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+ (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+ shifts[STATIC_MAC_FWD_PORTS];
+ alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
+- data_hi >>= 1;
++
++ /* KSZ8795 family switches have STATIC_MAC_TABLE_USE_FID and
++ * STATIC_MAC_TABLE_FID definitions off by 1 when doing read on the
++ * static MAC table compared to doing write.
++ */
++ if (ksz_is_ksz87xx(dev))
++ data_hi >>= 1;
+ alu->is_static = true;
+ alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+ alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+diff --git a/drivers/net/dsa/microchip/ksz_common.c b/drivers/net/dsa/microchip/ksz_common.c
+index 3d59298eaa5cf..8c492d56d2c36 100644
+--- a/drivers/net/dsa/microchip/ksz_common.c
++++ b/drivers/net/dsa/microchip/ksz_common.c
+@@ -286,13 +286,13 @@ static const u32 ksz8795_masks[] = {
+ [STATIC_MAC_TABLE_VALID] = BIT(21),
+ [STATIC_MAC_TABLE_USE_FID] = BIT(23),
+ [STATIC_MAC_TABLE_FID] = GENMASK(30, 24),
+- [STATIC_MAC_TABLE_OVERRIDE] = BIT(26),
+- [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(24, 20),
++ [STATIC_MAC_TABLE_OVERRIDE] = BIT(22),
++ [STATIC_MAC_TABLE_FWD_PORTS] = GENMASK(20, 16),
+ [DYNAMIC_MAC_TABLE_ENTRIES_H] = GENMASK(6, 0),
+- [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(8),
++ [DYNAMIC_MAC_TABLE_MAC_EMPTY] = BIT(7),
+ [DYNAMIC_MAC_TABLE_NOT_READY] = BIT(7),
+ [DYNAMIC_MAC_TABLE_ENTRIES] = GENMASK(31, 29),
+- [DYNAMIC_MAC_TABLE_FID] = GENMASK(26, 20),
++ [DYNAMIC_MAC_TABLE_FID] = GENMASK(22, 16),
+ [DYNAMIC_MAC_TABLE_SRC_PORT] = GENMASK(26, 24),
+ [DYNAMIC_MAC_TABLE_TIMESTAMP] = GENMASK(28, 27),
+ [P_MII_TX_FLOW_CTRL] = BIT(5),
+diff --git a/drivers/net/dsa/microchip/ksz_common.h b/drivers/net/dsa/microchip/ksz_common.h
+index 9cfa179575ce8..d1b2db8e65331 100644
+--- a/drivers/net/dsa/microchip/ksz_common.h
++++ b/drivers/net/dsa/microchip/ksz_common.h
+@@ -512,6 +512,13 @@ static inline void ksz_regmap_unlock(void *__mtx)
+ mutex_unlock(mtx);
+ }
+
++static inline bool ksz_is_ksz87xx(struct ksz_device *dev)
++{
++ return dev->chip_id == KSZ8795_CHIP_ID ||
++ dev->chip_id == KSZ8794_CHIP_ID ||
++ dev->chip_id == KSZ8765_CHIP_ID;
++}
++
+ static inline bool ksz_is_ksz88x3(struct ksz_device *dev)
+ {
+ return dev->chip_id == KSZ8830_CHIP_ID;
+--
+2.39.2
+
--- /dev/null
+From 25ba53cf4a6b0cb809c74f265b2e1cd0d00ea850 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:38 +0200
+Subject: net: dsa: microchip: ksz8: Make ksz8_r_sta_mac_table() static
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit b5751cdd7dbe618a03951bdd4c982a71ba448b1b ]
+
+As ksz8_r_sta_mac_table() is only used within ksz8795.c, there is no need
+to export it. Make the function static for better encapsulation.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Acked-by: Arun Ramadoss <arun.ramadoss@microchip.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8.h | 2 --
+ drivers/net/dsa/microchip/ksz8795.c | 4 ++--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8.h b/drivers/net/dsa/microchip/ksz8.h
+index 8582b4b67d989..28137c4bf2928 100644
+--- a/drivers/net/dsa/microchip/ksz8.h
++++ b/drivers/net/dsa/microchip/ksz8.h
+@@ -21,8 +21,6 @@ int ksz8_r_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 *val);
+ int ksz8_w_phy(struct ksz_device *dev, u16 phy, u16 reg, u16 val);
+ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ u8 *fid, u8 *src_port, u8 *timestamp, u16 *entries);
+-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+- struct alu_struct *alu);
+ void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+ struct alu_struct *alu);
+ void ksz8_r_mib_cnt(struct ksz_device *dev, int port, u16 addr, u64 *cnt);
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 38fd9b8e0287a..a2f67be66b97d 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -406,8 +406,8 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ return rc;
+ }
+
+-int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+- struct alu_struct *alu)
++static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
++ struct alu_struct *alu)
+ {
+ u32 data_hi, data_lo;
+ const u8 *shifts;
+--
+2.39.2
+
--- /dev/null
+From 07866a478229526bd65ea5676f89ffc143c3e040 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:36 +0200
+Subject: net: dsa: microchip: ksz8: Separate static MAC table operations for
+ code reuse
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit f6636ff69ec4f2c94a5ee1d032b21cfe1e0a5678 ]
+
+Move static MAC table operations to separate functions in order to reuse
+the code for add/del_fdb. This is needed to address kernel warnings
+caused by the lack of fdb add function support in the current driver.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c | 34 +++++++++++++++++++----------
+ 1 file changed, 23 insertions(+), 11 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index 22250ae222b5b..38fd9b8e0287a 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -926,8 +926,8 @@ int ksz8_fdb_dump(struct ksz_device *dev, int port,
+ return ret;
+ }
+
+-int ksz8_mdb_add(struct ksz_device *dev, int port,
+- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++static int ksz8_add_sta_mac(struct ksz_device *dev, int port,
++ const unsigned char *addr, u16 vid)
+ {
+ struct alu_struct alu;
+ int index;
+@@ -937,8 +937,8 @@ int ksz8_mdb_add(struct ksz_device *dev, int port,
+ for (index = 0; index < dev->info->num_statics; index++) {
+ if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+- alu.fid == mdb->vid)
++ if (!memcmp(alu.mac, addr, ETH_ALEN) &&
++ alu.fid == vid)
+ break;
+ /* Remember the first empty entry. */
+ } else if (!empty) {
+@@ -954,23 +954,23 @@ int ksz8_mdb_add(struct ksz_device *dev, int port,
+ if (index == dev->info->num_statics) {
+ index = empty - 1;
+ memset(&alu, 0, sizeof(alu));
+- memcpy(alu.mac, mdb->addr, ETH_ALEN);
++ memcpy(alu.mac, addr, ETH_ALEN);
+ alu.is_static = true;
+ }
+ alu.port_forward |= BIT(port);
+- if (mdb->vid) {
++ if (vid) {
+ alu.is_use_fid = true;
+
+ /* Need a way to map VID to FID. */
+- alu.fid = mdb->vid;
++ alu.fid = vid;
+ }
+ ksz8_w_sta_mac_table(dev, index, &alu);
+
+ return 0;
+ }
+
+-int ksz8_mdb_del(struct ksz_device *dev, int port,
+- const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++static int ksz8_del_sta_mac(struct ksz_device *dev, int port,
++ const unsigned char *addr, u16 vid)
+ {
+ struct alu_struct alu;
+ int index;
+@@ -978,8 +978,8 @@ int ksz8_mdb_del(struct ksz_device *dev, int port,
+ for (index = 0; index < dev->info->num_statics; index++) {
+ if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+ /* Found one already in static MAC table. */
+- if (!memcmp(alu.mac, mdb->addr, ETH_ALEN) &&
+- alu.fid == mdb->vid)
++ if (!memcmp(alu.mac, addr, ETH_ALEN) &&
++ alu.fid == vid)
+ break;
+ }
+ }
+@@ -998,6 +998,18 @@ int ksz8_mdb_del(struct ksz_device *dev, int port,
+ return 0;
+ }
+
++int ksz8_mdb_add(struct ksz_device *dev, int port,
++ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++{
++ return ksz8_add_sta_mac(dev, port, mdb->addr, mdb->vid);
++}
++
++int ksz8_mdb_del(struct ksz_device *dev, int port,
++ const struct switchdev_obj_port_mdb *mdb, struct dsa_db db)
++{
++ return ksz8_del_sta_mac(dev, port, mdb->addr, mdb->vid);
++}
++
+ int ksz8_port_vlan_filtering(struct ksz_device *dev, int port, bool flag,
+ struct netlink_ext_ack *extack)
+ {
+--
+2.39.2
+
--- /dev/null
+From fe300e7a9fd658eb7004931d40d174aea1c803a0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Apr 2023 12:18:39 +0200
+Subject: net: dsa: microchip: ksz8_r_sta_mac_table(): Avoid using error code
+ for empty entries
+
+From: Oleksij Rempel <o.rempel@pengutronix.de>
+
+[ Upstream commit 559901b46810e82ba5321a5e789f994b65d3bc3d ]
+
+Prepare for the next patch by ensuring that ksz8_r_sta_mac_table() does
+not use error codes for empty entries. This change will enable better
+handling of read/write errors in the upcoming patch.
+
+Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Reviewed-by: Vladimir Oltean <olteanv@gmail.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 4bdf79d686b4 ("net: dsa: microchip: correct KSZ8795 static MAC table access")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/dsa/microchip/ksz8795.c | 87 +++++++++++++++++------------
+ 1 file changed, 50 insertions(+), 37 deletions(-)
+
+diff --git a/drivers/net/dsa/microchip/ksz8795.c b/drivers/net/dsa/microchip/ksz8795.c
+index a2f67be66b97d..6639fae56da7f 100644
+--- a/drivers/net/dsa/microchip/ksz8795.c
++++ b/drivers/net/dsa/microchip/ksz8795.c
+@@ -407,7 +407,7 @@ int ksz8_r_dyn_mac_table(struct ksz_device *dev, u16 addr, u8 *mac_addr,
+ }
+
+ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+- struct alu_struct *alu)
++ struct alu_struct *alu, bool *valid)
+ {
+ u32 data_hi, data_lo;
+ const u8 *shifts;
+@@ -420,28 +420,32 @@ static int ksz8_r_sta_mac_table(struct ksz_device *dev, u16 addr,
+ ksz8_r_table(dev, TABLE_STATIC_MAC, addr, &data);
+ data_hi = data >> 32;
+ data_lo = (u32)data;
+- if (data_hi & (masks[STATIC_MAC_TABLE_VALID] |
+- masks[STATIC_MAC_TABLE_OVERRIDE])) {
+- alu->mac[5] = (u8)data_lo;
+- alu->mac[4] = (u8)(data_lo >> 8);
+- alu->mac[3] = (u8)(data_lo >> 16);
+- alu->mac[2] = (u8)(data_lo >> 24);
+- alu->mac[1] = (u8)data_hi;
+- alu->mac[0] = (u8)(data_hi >> 8);
+- alu->port_forward =
+- (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
+- shifts[STATIC_MAC_FWD_PORTS];
+- alu->is_override =
+- (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
+- data_hi >>= 1;
+- alu->is_static = true;
+- alu->is_use_fid =
+- (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
+- alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
+- shifts[STATIC_MAC_FID];
++
++ if (!(data_hi & (masks[STATIC_MAC_TABLE_VALID] |
++ masks[STATIC_MAC_TABLE_OVERRIDE]))) {
++ *valid = false;
+ return 0;
+ }
+- return -ENXIO;
++
++ alu->mac[5] = (u8)data_lo;
++ alu->mac[4] = (u8)(data_lo >> 8);
++ alu->mac[3] = (u8)(data_lo >> 16);
++ alu->mac[2] = (u8)(data_lo >> 24);
++ alu->mac[1] = (u8)data_hi;
++ alu->mac[0] = (u8)(data_hi >> 8);
++ alu->port_forward =
++ (data_hi & masks[STATIC_MAC_TABLE_FWD_PORTS]) >>
++ shifts[STATIC_MAC_FWD_PORTS];
++ alu->is_override = (data_hi & masks[STATIC_MAC_TABLE_OVERRIDE]) ? 1 : 0;
++ data_hi >>= 1;
++ alu->is_static = true;
++ alu->is_use_fid = (data_hi & masks[STATIC_MAC_TABLE_USE_FID]) ? 1 : 0;
++ alu->fid = (data_hi & masks[STATIC_MAC_TABLE_FID]) >>
++ shifts[STATIC_MAC_FID];
++
++ *valid = true;
++
++ return 0;
+ }
+
+ void ksz8_w_sta_mac_table(struct ksz_device *dev, u16 addr,
+@@ -930,20 +934,25 @@ static int ksz8_add_sta_mac(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid)
+ {
+ struct alu_struct alu;
+- int index;
++ int index, ret;
+ int empty = 0;
+
+ alu.port_forward = 0;
+ for (index = 0; index < dev->info->num_statics; index++) {
+- if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+- /* Found one already in static MAC table. */
+- if (!memcmp(alu.mac, addr, ETH_ALEN) &&
+- alu.fid == vid)
+- break;
+- /* Remember the first empty entry. */
+- } else if (!empty) {
+- empty = index + 1;
++ bool valid;
++
++ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
++ if (ret)
++ return ret;
++ if (!valid) {
++ /* Remember the first empty entry. */
++ if (!empty)
++ empty = index + 1;
++ continue;
+ }
++
++ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
++ break;
+ }
+
+ /* no available entry */
+@@ -973,15 +982,19 @@ static int ksz8_del_sta_mac(struct ksz_device *dev, int port,
+ const unsigned char *addr, u16 vid)
+ {
+ struct alu_struct alu;
+- int index;
++ int index, ret;
+
+ for (index = 0; index < dev->info->num_statics; index++) {
+- if (!ksz8_r_sta_mac_table(dev, index, &alu)) {
+- /* Found one already in static MAC table. */
+- if (!memcmp(alu.mac, addr, ETH_ALEN) &&
+- alu.fid == vid)
+- break;
+- }
++ bool valid;
++
++ ret = ksz8_r_sta_mac_table(dev, index, &alu, &valid);
++ if (ret)
++ return ret;
++ if (!valid)
++ continue;
++
++ if (!memcmp(alu.mac, addr, ETH_ALEN) && alu.fid == vid)
++ break;
+ }
+
+ /* no available entry */
+--
+2.39.2
+
--- /dev/null
+From d4038c95e83f7d2c42f76634c0bd1e407d38b652 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 00:20:35 +0800
+Subject: net: ethernet: litex: add support for 64 bit stats
+
+From: Jisheng Zhang <jszhang@kernel.org>
+
+[ Upstream commit 18da174d865a87d47d2f33f5b0a322efcf067728 ]
+
+Implement 64 bit per cpu stats to fix the overflow of netdev->stats
+on 32 bit platforms. To simplify the code, we use net core
+pcpu_sw_netstats infrastructure. One small drawback is some memory
+overhead because litex uses just one queue, but we allocate the
+counters per cpu.
+
+Signed-off-by: Jisheng Zhang <jszhang@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Acked-by: Gabriel Somlo <gsomlo@gmail.com>
+Link: https://lore.kernel.org/r/20230614162035.300-1-jszhang@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/litex/litex_liteeth.c | 19 +++++++++++++++----
+ 1 file changed, 15 insertions(+), 4 deletions(-)
+
+diff --git a/drivers/net/ethernet/litex/litex_liteeth.c b/drivers/net/ethernet/litex/litex_liteeth.c
+index 35f24e0f09349..ffa96059079c6 100644
+--- a/drivers/net/ethernet/litex/litex_liteeth.c
++++ b/drivers/net/ethernet/litex/litex_liteeth.c
+@@ -78,8 +78,7 @@ static int liteeth_rx(struct net_device *netdev)
+ memcpy_fromio(data, priv->rx_base + rx_slot * priv->slot_size, len);
+ skb->protocol = eth_type_trans(skb, netdev);
+
+- netdev->stats.rx_packets++;
+- netdev->stats.rx_bytes += len;
++ dev_sw_netstats_rx_add(netdev, len);
+
+ return netif_rx(skb);
+
+@@ -185,8 +184,7 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb,
+ litex_write16(priv->base + LITEETH_READER_LENGTH, skb->len);
+ litex_write8(priv->base + LITEETH_READER_START, 1);
+
+- netdev->stats.tx_bytes += skb->len;
+- netdev->stats.tx_packets++;
++ dev_sw_netstats_tx_add(netdev, 1, skb->len);
+
+ priv->tx_slot = (priv->tx_slot + 1) % priv->num_tx_slots;
+ dev_kfree_skb_any(skb);
+@@ -194,9 +192,17 @@ static netdev_tx_t liteeth_start_xmit(struct sk_buff *skb,
+ return NETDEV_TX_OK;
+ }
+
++static void
++liteeth_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats)
++{
++ netdev_stats_to_stats64(stats, &netdev->stats);
++ dev_fetch_sw_netstats(stats, netdev->tstats);
++}
++
+ static const struct net_device_ops liteeth_netdev_ops = {
+ .ndo_open = liteeth_open,
+ .ndo_stop = liteeth_stop,
++ .ndo_get_stats64 = liteeth_get_stats64,
+ .ndo_start_xmit = liteeth_start_xmit,
+ };
+
+@@ -242,6 +248,11 @@ static int liteeth_probe(struct platform_device *pdev)
+ priv->netdev = netdev;
+ priv->dev = &pdev->dev;
+
++ netdev->tstats = devm_netdev_alloc_pcpu_stats(&pdev->dev,
++ struct pcpu_sw_netstats);
++ if (!netdev->tstats)
++ return -ENOMEM;
++
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+--
+2.39.2
+
--- /dev/null
+From c3465911da1e9d1a7b64a1ed1f446f1ef9666ff2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 03:42:29 +0100
+Subject: net: ethernet: mtk_eth_soc: handle probe deferral
+
+From: Daniel Golle <daniel@makrotopia.org>
+
+[ Upstream commit 1d6d537dc55d1f42d16290f00157ac387985b95b ]
+
+Move the call to of_get_ethdev_address to mtk_add_mac which is part of
+the probe function and can hence itself return -EPROBE_DEFER should
+of_get_ethdev_address return -EPROBE_DEFER. This allows us to entirely
+get rid of the mtk_init function.
+
+The problem of of_get_ethdev_address returning -EPROBE_DEFER surfaced
+in situations in which the NVMEM provider holding the MAC address has
+not yet be loaded at the time mtk_eth_soc is initially probed. In this
+case probing of mtk_eth_soc should be deferred instead of falling back
+to use a random MAC address, so once the NVMEM provider becomes
+available probing can be repeated.
+
+Fixes: 656e705243fd ("net-next: mediatek: add support for MT7623 ethernet")
+Signed-off-by: Daniel Golle <daniel@makrotopia.org>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/mediatek/mtk_eth_soc.c | 29 ++++++++-------------
+ 1 file changed, 11 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+index 49975924e2426..7e318133423a9 100644
+--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
++++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+@@ -3425,23 +3425,6 @@ static int mtk_hw_deinit(struct mtk_eth *eth)
+ return 0;
+ }
+
+-static int __init mtk_init(struct net_device *dev)
+-{
+- struct mtk_mac *mac = netdev_priv(dev);
+- struct mtk_eth *eth = mac->hw;
+- int ret;
+-
+- ret = of_get_ethdev_address(mac->of_node, dev);
+- if (ret) {
+- /* If the mac address is invalid, use random mac address */
+- eth_hw_addr_random(dev);
+- dev_err(eth->dev, "generated random MAC address %pM\n",
+- dev->dev_addr);
+- }
+-
+- return 0;
+-}
+-
+ static void mtk_uninit(struct net_device *dev)
+ {
+ struct mtk_mac *mac = netdev_priv(dev);
+@@ -3789,7 +3772,6 @@ static const struct ethtool_ops mtk_ethtool_ops = {
+ };
+
+ static const struct net_device_ops mtk_netdev_ops = {
+- .ndo_init = mtk_init,
+ .ndo_uninit = mtk_uninit,
+ .ndo_open = mtk_open,
+ .ndo_stop = mtk_stop,
+@@ -3845,6 +3827,17 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np)
+ mac->hw = eth;
+ mac->of_node = np;
+
++ err = of_get_ethdev_address(mac->of_node, eth->netdev[id]);
++ if (err == -EPROBE_DEFER)
++ return err;
++
++ if (err) {
++ /* If the mac address is invalid, use random mac address */
++ eth_hw_addr_random(eth->netdev[id]);
++ dev_err(eth->dev, "generated random MAC address %pM\n",
++ eth->netdev[id]->dev_addr);
++ }
++
+ memset(mac->hwlro_ip, 0, sizeof(mac->hwlro_ip));
+ mac->hwlro_ip_cnt = 0;
+
+--
+2.39.2
+
--- /dev/null
+From c809a11a4b6d3cfd988c7fb48576f8544d3b1d7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Jul 2023 16:36:57 +0530
+Subject: net: ethernet: ti: cpsw_ale: Fix
+ cpsw_ale_get_field()/cpsw_ale_set_field()
+
+From: Tanmay Patil <t-patil@ti.com>
+
+[ Upstream commit b685f1a58956fa36cc01123f253351b25bfacfda ]
+
+CPSW ALE has 75 bit ALE entries which are stored within three 32 bit words.
+The cpsw_ale_get_field() and cpsw_ale_set_field() functions assume that the
+field will be strictly contained within one word. However, this is not
+guaranteed to be the case and it is possible for ALE field entries to span
+across up to two words at the most.
+
+Fix the methods to handle getting/setting fields spanning up to two words.
+
+Fixes: db82173f23c5 ("netdev: driver: ethernet: add cpsw address lookup engine support")
+Signed-off-by: Tanmay Patil <t-patil@ti.com>
+[s-vadapalli@ti.com: rephrased commit message and added Fixes tag]
+Signed-off-by: Siddharth Vadapalli <s-vadapalli@ti.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ti/cpsw_ale.c | 24 +++++++++++++++++++-----
+ 1 file changed, 19 insertions(+), 5 deletions(-)
+
+diff --git a/drivers/net/ethernet/ti/cpsw_ale.c b/drivers/net/ethernet/ti/cpsw_ale.c
+index 231370e9a8017..2647c18d40d95 100644
+--- a/drivers/net/ethernet/ti/cpsw_ale.c
++++ b/drivers/net/ethernet/ti/cpsw_ale.c
+@@ -106,23 +106,37 @@ struct cpsw_ale_dev_id {
+
+ static inline int cpsw_ale_get_field(u32 *ale_entry, u32 start, u32 bits)
+ {
+- int idx;
++ int idx, idx2;
++ u32 hi_val = 0;
+
+ idx = start / 32;
++ idx2 = (start + bits - 1) / 32;
++ /* Check if bits to be fetched exceed a word */
++ if (idx != idx2) {
++ idx2 = 2 - idx2; /* flip */
++ hi_val = ale_entry[idx2] << ((idx2 * 32) - start);
++ }
+ start -= idx * 32;
+ idx = 2 - idx; /* flip */
+- return (ale_entry[idx] >> start) & BITMASK(bits);
++ return (hi_val + (ale_entry[idx] >> start)) & BITMASK(bits);
+ }
+
+ static inline void cpsw_ale_set_field(u32 *ale_entry, u32 start, u32 bits,
+ u32 value)
+ {
+- int idx;
++ int idx, idx2;
+
+ value &= BITMASK(bits);
+- idx = start / 32;
++ idx = start / 32;
++ idx2 = (start + bits - 1) / 32;
++ /* Check if bits to be set exceed a word */
++ if (idx != idx2) {
++ idx2 = 2 - idx2; /* flip */
++ ale_entry[idx2] &= ~(BITMASK(bits + start - (idx2 * 32)));
++ ale_entry[idx2] |= (value >> ((idx2 * 32) - start));
++ }
+ start -= idx * 32;
+- idx = 2 - idx; /* flip */
++ idx = 2 - idx; /* flip */
+ ale_entry[idx] &= ~(BITMASK(bits) << start);
+ ale_entry[idx] |= (value << start);
+ }
+--
+2.39.2
+
--- /dev/null
+From c7bac058c0b91ef65d58a3020117d8bad2853616 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 20:33:08 +0800
+Subject: net: hns3: fix strncpy() not using dest-buf length as length issue
+
+From: Hao Chen <chenhao418@huawei.com>
+
+[ Upstream commit 1cf3d5567f273a8746d1bade00633a93204f80f0 ]
+
+Now, strncpy() in hns3_dbg_fill_content() use src-length as copy-length,
+it may result in dest-buf overflow.
+
+This patch is to fix intel compile warning for csky-linux-gcc (GCC) 12.1.0
+compiler.
+
+The warning reports as below:
+
+hclge_debugfs.c:92:25: warning: 'strncpy' specified bound depends on
+the length of the source argument [-Wstringop-truncation]
+
+strncpy(pos, items[i].name, strlen(items[i].name));
+
+hclge_debugfs.c:90:25: warning: 'strncpy' output truncated before
+terminating nul copying as many bytes from a string as its length
+[-Wstringop-truncation]
+
+strncpy(pos, result[i], strlen(result[i]));
+
+strncpy() use src-length as copy-length, it may result in
+dest-buf overflow.
+
+So,this patch add some values check to avoid this issue.
+
+Signed-off-by: Hao Chen <chenhao418@huawei.com>
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/lkml/202207170606.7WtHs9yS-lkp@intel.com/T/
+Signed-off-by: Hao Lan <lanhao@huawei.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../ethernet/hisilicon/hns3/hns3_debugfs.c | 31 ++++++++++++++-----
+ .../hisilicon/hns3/hns3pf/hclge_debugfs.c | 29 ++++++++++++++---
+ 2 files changed, 48 insertions(+), 12 deletions(-)
+
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+index bcccd82a2620f..f6ededec5a4fa 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+@@ -435,19 +435,36 @@ static void hns3_dbg_fill_content(char *content, u16 len,
+ const struct hns3_dbg_item *items,
+ const char **result, u16 size)
+ {
++#define HNS3_DBG_LINE_END_LEN 2
+ char *pos = content;
++ u16 item_len;
+ u16 i;
+
++ if (!len) {
++ return;
++ } else if (len <= HNS3_DBG_LINE_END_LEN) {
++ *pos++ = '\0';
++ return;
++ }
++
+ memset(content, ' ', len);
+- for (i = 0; i < size; i++) {
+- if (result)
+- strncpy(pos, result[i], strlen(result[i]));
+- else
+- strncpy(pos, items[i].name, strlen(items[i].name));
++ len -= HNS3_DBG_LINE_END_LEN;
+
+- pos += strlen(items[i].name) + items[i].interval;
++ for (i = 0; i < size; i++) {
++ item_len = strlen(items[i].name) + items[i].interval;
++ if (len < item_len)
++ break;
++
++ if (result) {
++ if (item_len < strlen(result[i]))
++ break;
++ strscpy(pos, result[i], strlen(result[i]));
++ } else {
++ strscpy(pos, items[i].name, strlen(items[i].name));
++ }
++ pos += item_len;
++ len -= item_len;
+ }
+-
+ *pos++ = '\n';
+ *pos++ = '\0';
+ }
+diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+index 142415c84c6b2..0ebc21401b7c2 100644
+--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
++++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+@@ -87,16 +87,35 @@ static void hclge_dbg_fill_content(char *content, u16 len,
+ const struct hclge_dbg_item *items,
+ const char **result, u16 size)
+ {
++#define HCLGE_DBG_LINE_END_LEN 2
+ char *pos = content;
++ u16 item_len;
+ u16 i;
+
++ if (!len) {
++ return;
++ } else if (len <= HCLGE_DBG_LINE_END_LEN) {
++ *pos++ = '\0';
++ return;
++ }
++
+ memset(content, ' ', len);
++ len -= HCLGE_DBG_LINE_END_LEN;
++
+ for (i = 0; i < size; i++) {
+- if (result)
+- strncpy(pos, result[i], strlen(result[i]));
+- else
+- strncpy(pos, items[i].name, strlen(items[i].name));
+- pos += strlen(items[i].name) + items[i].interval;
++ item_len = strlen(items[i].name) + items[i].interval;
++ if (len < item_len)
++ break;
++
++ if (result) {
++ if (item_len < strlen(result[i]))
++ break;
++ strscpy(pos, result[i], strlen(result[i]));
++ } else {
++ strscpy(pos, items[i].name, strlen(items[i].name));
++ }
++ pos += item_len;
++ len -= item_len;
+ }
+ *pos++ = '\n';
+ *pos++ = '\0';
+--
+2.39.2
+
--- /dev/null
+From d2d9a97443c3d363ac55a22c42cc9e677b12faa3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 23 May 2023 18:14:52 +0200
+Subject: net: ipv4: use consistent txhash in TIME_WAIT and SYN_RECV
+
+From: Antoine Tenart <atenart@kernel.org>
+
+[ Upstream commit c0a8966e2bc7d31f77a7246947ebc09c1ff06066 ]
+
+When using IPv4/TCP, skb->hash comes from sk->sk_txhash except in
+TIME_WAIT and SYN_RECV where it's not set in the reply skb from
+ip_send_unicast_reply. Those packets will have a mismatched hash with
+others from the same flow as their hashes will be 0. IPv6 does not have
+the same issue as the hash is set from the socket txhash in those cases.
+
+This commits sets the hash in the reply skb from ip_send_unicast_reply,
+which makes the IPv4 code behaving like IPv6.
+
+Signed-off-by: Antoine Tenart <atenart@kernel.org>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 5e5265522a9a ("tcp: annotate data-races around tcp_rsk(req)->txhash")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/ip.h | 2 +-
+ net/ipv4/ip_output.c | 4 +++-
+ net/ipv4/tcp_ipv4.c | 14 +++++++++-----
+ 3 files changed, 13 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/ip.h b/include/net/ip.h
+index acec504c469a0..83a1a9bc3ceb1 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -282,7 +282,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+ const struct ip_options *sopt,
+ __be32 daddr, __be32 saddr,
+ const struct ip_reply_arg *arg,
+- unsigned int len, u64 transmit_time);
++ unsigned int len, u64 transmit_time, u32 txhash);
+
+ #define IP_INC_STATS(net, field) SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+ #define __IP_INC_STATS(net, field) __SNMP_INC_STATS64((net)->mib.ip_statistics, field)
+diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+index 2a07588265c70..7b4ab545c06e0 100644
+--- a/net/ipv4/ip_output.c
++++ b/net/ipv4/ip_output.c
+@@ -1691,7 +1691,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+ const struct ip_options *sopt,
+ __be32 daddr, __be32 saddr,
+ const struct ip_reply_arg *arg,
+- unsigned int len, u64 transmit_time)
++ unsigned int len, u64 transmit_time, u32 txhash)
+ {
+ struct ip_options_data replyopts;
+ struct ipcm_cookie ipc;
+@@ -1754,6 +1754,8 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb,
+ arg->csum));
+ nskb->ip_summed = CHECKSUM_NONE;
+ nskb->mono_delivery_time = !!transmit_time;
++ if (txhash)
++ skb_set_hash(nskb, txhash, PKT_HASH_TYPE_L4);
+ ip_push_pending_frames(sk, &fl4);
+ }
+ out:
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index a7de5ba74e7f7..ef740983a1222 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -692,6 +692,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ u64 transmit_time = 0;
+ struct sock *ctl_sk;
+ struct net *net;
++ u32 txhash = 0;
+
+ /* Never send a reset in response to a reset. */
+ if (th->rst)
+@@ -829,6 +830,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ inet_twsk(sk)->tw_priority : sk->sk_priority;
+ transmit_time = tcp_transmit_time(sk);
+ xfrm_sk_clone_policy(ctl_sk, sk);
++ txhash = (sk->sk_state == TCP_TIME_WAIT) ?
++ inet_twsk(sk)->tw_txhash : sk->sk_txhash;
+ } else {
+ ctl_sk->sk_mark = 0;
+ ctl_sk->sk_priority = 0;
+@@ -837,7 +840,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb)
+ skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len,
+- transmit_time);
++ transmit_time, txhash);
+
+ xfrm_sk_free_policy(ctl_sk);
+ sock_net_set(ctl_sk, &init_net);
+@@ -859,7 +862,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+ struct sk_buff *skb, u32 seq, u32 ack,
+ u32 win, u32 tsval, u32 tsecr, int oif,
+ struct tcp_md5sig_key *key,
+- int reply_flags, u8 tos)
++ int reply_flags, u8 tos, u32 txhash)
+ {
+ const struct tcphdr *th = tcp_hdr(skb);
+ struct {
+@@ -935,7 +938,7 @@ static void tcp_v4_send_ack(const struct sock *sk,
+ skb, &TCP_SKB_CB(skb)->header.h4.opt,
+ ip_hdr(skb)->saddr, ip_hdr(skb)->daddr,
+ &arg, arg.iov[0].iov_len,
+- transmit_time);
++ transmit_time, txhash);
+
+ sock_net_set(ctl_sk, &init_net);
+ __TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
+@@ -955,7 +958,8 @@ static void tcp_v4_timewait_ack(struct sock *sk, struct sk_buff *skb)
+ tw->tw_bound_dev_if,
+ tcp_twsk_md5_key(tcptw),
+ tw->tw_transparent ? IP_REPLY_ARG_NOSRCCHECK : 0,
+- tw->tw_tos
++ tw->tw_tos,
++ tw->tw_txhash
+ );
+
+ inet_twsk_put(tw);
+@@ -988,7 +992,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ 0,
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+- ip_hdr(skb)->tos);
++ ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
+ }
+
+ /*
+--
+2.39.2
+
--- /dev/null
+From 9ba17b30e66744d6805871a41ff330f6594f1806 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 17:59:19 +0800
+Subject: net: ipv4: Use kfree_sensitive instead of kfree
+
+From: Wang Ming <machel@vivo.com>
+
+[ Upstream commit daa751444fd9d4184270b1479d8af49aaf1a1ee6 ]
+
+key might contain private part of the key, so better use
+kfree_sensitive to free it.
+
+Fixes: 38320c70d282 ("[IPSEC]: Use crypto_aead and authenc in ESP")
+Signed-off-by: Wang Ming <machel@vivo.com>
+Reviewed-by: Tariq Toukan <tariqt@nvidia.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/esp4.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+index 52c8047efedbb..2d094d417ecae 100644
+--- a/net/ipv4/esp4.c
++++ b/net/ipv4/esp4.c
+@@ -1132,7 +1132,7 @@ static int esp_init_authenc(struct xfrm_state *x,
+ err = crypto_aead_setkey(aead, key, keylen);
+
+ free_key:
+- kfree(key);
++ kfree_sensitive(key);
+
+ error:
+ return err;
+--
+2.39.2
+
--- /dev/null
+From d40157f8faa30cf97d32dde6d80704d5d0898f75 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 22:45:19 +0800
+Subject: net:ipv6: check return value of pskb_trim()
+
+From: Yuanjun Gong <ruc_gongyuanjun@163.com>
+
+[ Upstream commit 4258faa130be4ea43e5e2d839467da421b8ff274 ]
+
+goto tx_err if an unexpected result is returned by pskb_tirm()
+in ip6erspan_tunnel_xmit().
+
+Fixes: 5a963eb61b7c ("ip6_gre: Add ERSPAN native tunnel support")
+Signed-off-by: Yuanjun Gong <ruc_gongyuanjun@163.com>
+Reviewed-by: David Ahern <dsahern@kernel.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv6/ip6_gre.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
+index 216b40ccadae0..d3fba7d8dec4e 100644
+--- a/net/ipv6/ip6_gre.c
++++ b/net/ipv6/ip6_gre.c
+@@ -977,7 +977,8 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
+ goto tx_err;
+
+ if (skb->len > dev->mtu + dev->hard_header_len) {
+- pskb_trim(skb, dev->mtu + dev->hard_header_len);
++ if (pskb_trim(skb, dev->mtu + dev->hard_header_len))
++ goto tx_err;
+ truncate = true;
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 5cd4f073ef92600361ab34604f85b132f284a528 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 03:02:31 +0300
+Subject: net: phy: prevent stale pointer dereference in phy_init()
+
+From: Vladimir Oltean <vladimir.oltean@nxp.com>
+
+[ Upstream commit 1c613beaf877c0c0d755853dc62687e2013e55c4 ]
+
+mdio_bus_init() and phy_driver_register() both have error paths, and if
+those are ever hit, ethtool will have a stale pointer to the
+phy_ethtool_phy_ops stub structure, which references memory from a
+module that failed to load (phylib).
+
+It is probably hard to force an error in this code path even manually,
+but the error teardown path of phy_init() should be the same as
+phy_exit(), which is now simply not the case.
+
+Fixes: 55d8f053ce1b ("net: phy: Register ethtool PHY operations")
+Link: https://lore.kernel.org/netdev/ZLaiJ4G6TaJYGJyU@shell.armlinux.org.uk/
+Suggested-by: Russell King (Oracle) <linux@armlinux.org.uk>
+Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
+Link: https://lore.kernel.org/r/20230720000231.1939689-1-vladimir.oltean@nxp.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/phy/phy_device.c | 21 ++++++++++++++-------
+ 1 file changed, 14 insertions(+), 7 deletions(-)
+
+diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
+index 7fbb0904b3c0f..82f74f96eba29 100644
+--- a/drivers/net/phy/phy_device.c
++++ b/drivers/net/phy/phy_device.c
+@@ -3252,23 +3252,30 @@ static int __init phy_init(void)
+ {
+ int rc;
+
++ ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
++
+ rc = mdio_bus_init();
+ if (rc)
+- return rc;
++ goto err_ethtool_phy_ops;
+
+- ethtool_set_ethtool_phy_ops(&phy_ethtool_phy_ops);
+ features_init();
+
+ rc = phy_driver_register(&genphy_c45_driver, THIS_MODULE);
+ if (rc)
+- goto err_c45;
++ goto err_mdio_bus;
+
+ rc = phy_driver_register(&genphy_driver, THIS_MODULE);
+- if (rc) {
+- phy_driver_unregister(&genphy_c45_driver);
++ if (rc)
++ goto err_c45;
++
++ return 0;
++
+ err_c45:
+- mdio_bus_exit();
+- }
++ phy_driver_unregister(&genphy_c45_driver);
++err_mdio_bus:
++ mdio_bus_exit();
++err_ethtool_phy_ops:
++ ethtool_set_ethtool_phy_ops(NULL);
+
+ return rc;
+ }
+--
+2.39.2
+
--- /dev/null
+From 80ba7d3f04c1dd00e5a8cdab662fc9acf1a3b2b6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:13 -0300
+Subject: net: sched: cls_bpf: Undo tcf_bind_filter in case of an error
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 26a22194927e8521e304ed75c2f38d8068d55fc7 ]
+
+If cls_bpf_offload errors out, we must also undo tcf_bind_filter that
+was done before the error.
+
+Fix that by calling tcf_unbind_filter in errout_parms.
+
+Fixes: eadb41489fd2 ("net: cls_bpf: add support for marking filters as hardware-only")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_bpf.c | 99 +++++++++++++++++++++------------------------
+ 1 file changed, 47 insertions(+), 52 deletions(-)
+
+diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c
+index bc317b3eac124..0320e11eb248b 100644
+--- a/net/sched/cls_bpf.c
++++ b/net/sched/cls_bpf.c
+@@ -404,56 +404,6 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog,
+ return 0;
+ }
+
+-static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp,
+- struct cls_bpf_prog *prog, unsigned long base,
+- struct nlattr **tb, struct nlattr *est, u32 flags,
+- struct netlink_ext_ack *extack)
+-{
+- bool is_bpf, is_ebpf, have_exts = false;
+- u32 gen_flags = 0;
+- int ret;
+-
+- is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
+- is_ebpf = tb[TCA_BPF_FD];
+- if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf))
+- return -EINVAL;
+-
+- ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, flags,
+- extack);
+- if (ret < 0)
+- return ret;
+-
+- if (tb[TCA_BPF_FLAGS]) {
+- u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
+-
+- if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT)
+- return -EINVAL;
+-
+- have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
+- }
+- if (tb[TCA_BPF_FLAGS_GEN]) {
+- gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
+- if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
+- !tc_flags_valid(gen_flags))
+- return -EINVAL;
+- }
+-
+- prog->exts_integrated = have_exts;
+- prog->gen_flags = gen_flags;
+-
+- ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
+- cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+- if (ret < 0)
+- return ret;
+-
+- if (tb[TCA_BPF_CLASSID]) {
+- prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
+- tcf_bind_filter(tp, &prog->res, base);
+- }
+-
+- return 0;
+-}
+-
+ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+@@ -461,9 +411,12 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ struct netlink_ext_ack *extack)
+ {
+ struct cls_bpf_head *head = rtnl_dereference(tp->root);
++ bool is_bpf, is_ebpf, have_exts = false;
+ struct cls_bpf_prog *oldprog = *arg;
+ struct nlattr *tb[TCA_BPF_MAX + 1];
++ bool bound_to_filter = false;
+ struct cls_bpf_prog *prog;
++ u32 gen_flags = 0;
+ int ret;
+
+ if (tca[TCA_OPTIONS] == NULL)
+@@ -502,11 +455,51 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ goto errout;
+ prog->handle = handle;
+
+- ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], flags,
+- extack);
++ is_bpf = tb[TCA_BPF_OPS_LEN] && tb[TCA_BPF_OPS];
++ is_ebpf = tb[TCA_BPF_FD];
++ if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++
++ ret = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &prog->exts,
++ flags, extack);
++ if (ret < 0)
++ goto errout_idr;
++
++ if (tb[TCA_BPF_FLAGS]) {
++ u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]);
++
++ if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++
++ have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT;
++ }
++ if (tb[TCA_BPF_FLAGS_GEN]) {
++ gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]);
++ if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS ||
++ !tc_flags_valid(gen_flags)) {
++ ret = -EINVAL;
++ goto errout_idr;
++ }
++ }
++
++ prog->exts_integrated = have_exts;
++ prog->gen_flags = gen_flags;
++
++ ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) :
++ cls_bpf_prog_from_efd(tb, prog, gen_flags, tp);
+ if (ret < 0)
+ goto errout_idr;
+
++ if (tb[TCA_BPF_CLASSID]) {
++ prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]);
++ tcf_bind_filter(tp, &prog->res, base);
++ bound_to_filter = true;
++ }
++
+ ret = cls_bpf_offload(tp, prog, oldprog, extack);
+ if (ret)
+ goto errout_parms;
+@@ -528,6 +521,8 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb,
+ return 0;
+
+ errout_parms:
++ if (bound_to_filter)
++ tcf_unbind_filter(tp, &prog->res);
+ cls_bpf_free_parms(prog);
+ errout_idr:
+ if (!oldprog)
+--
+2.39.2
+
--- /dev/null
+From df17b2737c98c54588b1108cd709109a4a053d7e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:10 -0300
+Subject: net: sched: cls_matchall: Undo tcf_bind_filter in case of failure
+ after mall_set_parms
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit b3d0e0489430735e2e7626aa37e6462cdd136e9d ]
+
+In case an error occurred after mall_set_parms executed successfully, we
+must undo the tcf_bind_filter call it issues.
+
+Fix that by calling tcf_unbind_filter in err_replace_hw_filter label.
+
+Fixes: ec2507d2a306 ("net/sched: cls_matchall: Fix error path")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_matchall.c | 35 ++++++++++++-----------------------
+ 1 file changed, 12 insertions(+), 23 deletions(-)
+
+diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
+index 39a5d9c170def..43f8df5847414 100644
+--- a/net/sched/cls_matchall.c
++++ b/net/sched/cls_matchall.c
+@@ -157,26 +157,6 @@ static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = {
+ [TCA_MATCHALL_FLAGS] = { .type = NLA_U32 },
+ };
+
+-static int mall_set_parms(struct net *net, struct tcf_proto *tp,
+- struct cls_mall_head *head,
+- unsigned long base, struct nlattr **tb,
+- struct nlattr *est, u32 flags, u32 fl_flags,
+- struct netlink_ext_ack *extack)
+-{
+- int err;
+-
+- err = tcf_exts_validate_ex(net, tp, tb, est, &head->exts, flags,
+- fl_flags, extack);
+- if (err < 0)
+- return err;
+-
+- if (tb[TCA_MATCHALL_CLASSID]) {
+- head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
+- tcf_bind_filter(tp, &head->res, base);
+- }
+- return 0;
+-}
+-
+ static int mall_change(struct net *net, struct sk_buff *in_skb,
+ struct tcf_proto *tp, unsigned long base,
+ u32 handle, struct nlattr **tca,
+@@ -185,6 +165,7 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+ {
+ struct cls_mall_head *head = rtnl_dereference(tp->root);
+ struct nlattr *tb[TCA_MATCHALL_MAX + 1];
++ bool bound_to_filter = false;
+ struct cls_mall_head *new;
+ u32 userflags = 0;
+ int err;
+@@ -224,11 +205,17 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+ goto err_alloc_percpu;
+ }
+
+- err = mall_set_parms(net, tp, new, base, tb, tca[TCA_RATE],
+- flags, new->flags, extack);
+- if (err)
++ err = tcf_exts_validate_ex(net, tp, tb, tca[TCA_RATE],
++ &new->exts, flags, new->flags, extack);
++ if (err < 0)
+ goto err_set_parms;
+
++ if (tb[TCA_MATCHALL_CLASSID]) {
++ new->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]);
++ tcf_bind_filter(tp, &new->res, base);
++ bound_to_filter = true;
++ }
++
+ if (!tc_skip_hw(new->flags)) {
+ err = mall_replace_hw_filter(tp, new, (unsigned long)new,
+ extack);
+@@ -244,6 +231,8 @@ static int mall_change(struct net *net, struct sk_buff *in_skb,
+ return 0;
+
+ err_replace_hw_filter:
++ if (bound_to_filter)
++ tcf_unbind_filter(tp, &new->res);
+ err_set_parms:
+ free_percpu(new->pf);
+ err_alloc_percpu:
+--
+2.39.2
+
--- /dev/null
+From 2565a1a811821f66ba1cd9a3bb9496fbecdc80e2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:12 -0300
+Subject: net: sched: cls_u32: Undo refcount decrement in case update failed
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit e8d3d78c19be0264a5692bed477c303523aead31 ]
+
+In the case of an update, when TCA_U32_LINK is set, u32_set_parms will
+decrement the refcount of the ht_down (struct tc_u_hnode) pointer
+present in the older u32 filter which we are replacing. However, if
+u32_replace_hw_knode errors out, the update command fails and that
+ht_down pointer continues decremented. To fix that, when
+u32_replace_hw_knode fails, check if ht_down's refcount was decremented
+and undo the decrement.
+
+Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index 7cfbcd5180841..1280736a7b92e 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -926,6 +926,13 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ if (err) {
+ u32_unbind_filter(tp, new, tb);
+
++ if (tb[TCA_U32_LINK]) {
++ struct tc_u_hnode *ht_old;
++
++ ht_old = rtnl_dereference(n->ht_down);
++ if (ht_old)
++ ht_old->refcnt++;
++ }
+ __u32_destroy_key(new);
+ return err;
+ }
+--
+2.39.2
+
--- /dev/null
+From 66d4c485e832ee7c6d50709763bfdf4c14e821d0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Jul 2023 15:05:11 -0300
+Subject: net: sched: cls_u32: Undo tcf_bind_filter if u32_replace_hw_knode
+
+From: Victor Nogueira <victor@mojatatu.com>
+
+[ Upstream commit 9cb36faedeafb9720ac236aeae2ea57091d90a09 ]
+
+When u32_replace_hw_knode fails, we need to undo the tcf_bind_filter
+operation done at u32_set_parms.
+
+Fixes: d34e3e181395 ("net: cls_u32: Add support for skip-sw flag to tc u32 classifier.")
+Signed-off-by: Victor Nogueira <victor@mojatatu.com>
+Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
+Reviewed-by: Pedro Tammela <pctammela@mojatatu.com>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Signed-off-by: David S. Miller <davem@davemloft.net>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_u32.c | 41 ++++++++++++++++++++++++++++++-----------
+ 1 file changed, 30 insertions(+), 11 deletions(-)
+
+diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
+index a3477537c102b..7cfbcd5180841 100644
+--- a/net/sched/cls_u32.c
++++ b/net/sched/cls_u32.c
+@@ -710,8 +710,23 @@ static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = {
+ [TCA_U32_FLAGS] = { .type = NLA_U32 },
+ };
+
++static void u32_unbind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
++ struct nlattr **tb)
++{
++ if (tb[TCA_U32_CLASSID])
++ tcf_unbind_filter(tp, &n->res);
++}
++
++static void u32_bind_filter(struct tcf_proto *tp, struct tc_u_knode *n,
++ unsigned long base, struct nlattr **tb)
++{
++ if (tb[TCA_U32_CLASSID]) {
++ n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
++ tcf_bind_filter(tp, &n->res, base);
++ }
++}
++
+ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+- unsigned long base,
+ struct tc_u_knode *n, struct nlattr **tb,
+ struct nlattr *est, u32 flags, u32 fl_flags,
+ struct netlink_ext_ack *extack)
+@@ -758,10 +773,6 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp,
+ if (ht_old)
+ ht_old->refcnt--;
+ }
+- if (tb[TCA_U32_CLASSID]) {
+- n->res.classid = nla_get_u32(tb[TCA_U32_CLASSID]);
+- tcf_bind_filter(tp, &n->res, base);
+- }
+
+ if (ifindex >= 0)
+ n->ifindex = ifindex;
+@@ -901,17 +912,20 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ if (!new)
+ return -ENOMEM;
+
+- err = u32_set_parms(net, tp, base, new, tb,
+- tca[TCA_RATE], flags, new->flags,
+- extack);
++ err = u32_set_parms(net, tp, new, tb, tca[TCA_RATE],
++ flags, new->flags, extack);
+
+ if (err) {
+ __u32_destroy_key(new);
+ return err;
+ }
+
++ u32_bind_filter(tp, new, base, tb);
++
+ err = u32_replace_hw_knode(tp, new, flags, extack);
+ if (err) {
++ u32_unbind_filter(tp, new, tb);
++
+ __u32_destroy_key(new);
+ return err;
+ }
+@@ -1072,15 +1086,18 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ }
+ #endif
+
+- err = u32_set_parms(net, tp, base, n, tb, tca[TCA_RATE],
++ err = u32_set_parms(net, tp, n, tb, tca[TCA_RATE],
+ flags, n->flags, extack);
++
++ u32_bind_filter(tp, n, base, tb);
++
+ if (err == 0) {
+ struct tc_u_knode __rcu **ins;
+ struct tc_u_knode *pins;
+
+ err = u32_replace_hw_knode(tp, n, flags, extack);
+ if (err)
+- goto errhw;
++ goto errunbind;
+
+ if (!tc_in_hw(n->flags))
+ n->flags |= TCA_CLS_FLAGS_NOT_IN_HW;
+@@ -1098,7 +1115,9 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
+ return 0;
+ }
+
+-errhw:
++errunbind:
++ u32_unbind_filter(tp, n, tb);
++
+ #ifdef CONFIG_CLS_U32_MARK
+ free_percpu(n->pcpu_success);
+ #endif
+--
+2.39.2
+
--- /dev/null
+From 93023625146793635d96beb87c81594cb326e47c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Jul 2023 01:30:33 +0200
+Subject: netfilter: nf_tables: can't schedule in nft_chain_validate
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 314c82841602a111c04a7210c21dc77e0d560242 ]
+
+Can be called via nft set element list iteration, which may acquire
+rcu and/or bh read lock (depends on set type).
+
+BUG: sleeping function called from invalid context at net/netfilter/nf_tables_api.c:3353
+in_atomic(): 0, irqs_disabled(): 0, non_block: 0, pid: 1232, name: nft
+preempt_count: 0, expected: 0
+RCU nest depth: 1, expected: 0
+2 locks held by nft/1232:
+ #0: ffff8881180e3ea8 (&nft_net->commit_mutex){+.+.}-{3:3}, at: nf_tables_valid_genid
+ #1: ffffffff83f5f540 (rcu_read_lock){....}-{1:2}, at: rcu_lock_acquire
+Call Trace:
+ nft_chain_validate
+ nft_lookup_validate_setelem
+ nft_pipapo_walk
+ nft_lookup_validate
+ nft_chain_validate
+ nft_immediate_validate
+ nft_chain_validate
+ nf_tables_validate
+ nf_tables_abort
+
+No choice but to move it to nf_tables_validate().
+
+Fixes: 81ea01066741 ("netfilter: nf_tables: add rescheduling points during loop detection walks")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 58f14e4ef63d4..0bb1cc7ed5e99 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3500,8 +3500,6 @@ int nft_chain_validate(const struct nft_ctx *ctx, const struct nft_chain *chain)
+ if (err < 0)
+ return err;
+ }
+-
+- cond_resched();
+ }
+
+ return 0;
+@@ -3525,6 +3523,8 @@ static int nft_table_validate(struct net *net, const struct nft_table *table)
+ err = nft_chain_validate(&ctx, chain);
+ if (err < 0)
+ return err;
++
++ cond_resched();
+ }
+
+ return 0;
+--
+2.39.2
+
--- /dev/null
+From 447b7e2bbc060e4f8293f9e084a379b95e8bf78b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 00:29:58 +0200
+Subject: netfilter: nf_tables: fix spurious set element insertion failure
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit ddbd8be68941985f166f5107109a90ce13147c44 ]
+
+On some platforms there is a padding hole in the nft_verdict
+structure, between the verdict code and the chain pointer.
+
+On element insertion, if the new element clashes with an existing one and
+NLM_F_EXCL flag isn't set, we want to ignore the -EEXIST error as long as
+the data associated with duplicated element is the same as the existing
+one. The data equality check uses memcmp.
+
+For normal data (NFT_DATA_VALUE) this works fine, but for NFT_DATA_VERDICT
+padding area leads to spurious failure even if the verdict data is the
+same.
+
+This then makes the insertion fail with 'already exists' error, even
+though the new "key : data" matches an existing entry and userspace
+told the kernel that it doesn't want to receive an error indication.
+
+Fixes: c016c7e45ddf ("netfilter: nf_tables: honor NLM_F_EXCL flag in set element insertion")
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 938cfa9a3adb6..58f14e4ef63d4 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10114,6 +10114,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
+
+ if (!tb[NFTA_VERDICT_CODE])
+ return -EINVAL;
++
++ /* zero padding hole for memcmp */
++ memset(data, 0, sizeof(*data));
+ data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
+
+ switch (data->verdict.code) {
+--
+2.39.2
+
--- /dev/null
+From 2de006dd895fa8e0d71406e0293e4e0caa40e552 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 20:19:43 +0200
+Subject: netfilter: nf_tables: skip bound chain in netns release path
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 751d460ccff3137212f47d876221534bf0490996 ]
+
+Skip bound chain from netns release path, the rule that owns this chain
+releases these objects.
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index 0bb1cc7ed5e99..f621c5e48747b 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -10398,6 +10398,9 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
+ ctx.family = table->family;
+ ctx.table = table;
+ list_for_each_entry(chain, &table->chains, list) {
++ if (nft_chain_is_bound(chain))
++ continue;
++
+ ctx.chain = chain;
+ list_for_each_entry_safe(rule, nr, &chain->rules, list) {
+ list_del(&rule->list);
+--
+2.39.2
+
--- /dev/null
+From 00af5d0ed7436d8d334b78b70165969fd0c0dde3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 20 Jul 2023 09:17:21 +0200
+Subject: netfilter: nf_tables: skip bound chain on rule flush
+
+From: Pablo Neira Ayuso <pablo@netfilter.org>
+
+[ Upstream commit 6eaf41e87a223ae6f8e7a28d6e78384ad7e407f8 ]
+
+Skip bound chain when flushing table rules, the rule that owns this
+chain releases these objects.
+
+Otherwise, the following warning is triggered:
+
+ WARNING: CPU: 2 PID: 1217 at net/netfilter/nf_tables_api.c:2013 nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
+ CPU: 2 PID: 1217 Comm: chain-flush Not tainted 6.1.39 #1
+ RIP: 0010:nf_tables_chain_destroy+0x1f7/0x210 [nf_tables]
+
+Fixes: d0e2c7de92c7 ("netfilter: nf_tables: add NFT_CHAIN_BINDING")
+Reported-by: Kevin Rich <kevinrich1337@gmail.com>
+Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nf_tables_api.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
+index f621c5e48747b..ecde497368ec4 100644
+--- a/net/netfilter/nf_tables_api.c
++++ b/net/netfilter/nf_tables_api.c
+@@ -3892,6 +3892,8 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
+ list_for_each_entry(chain, &table->chains, list) {
+ if (!nft_is_active_next(net, chain))
+ continue;
++ if (nft_chain_is_bound(chain))
++ continue;
+
+ ctx.chain = chain;
+ err = nft_delrule_by_chain(&ctx);
+--
+2.39.2
+
--- /dev/null
+From 83c0d8d2e1df2dea06f0b2bf34a73af311411a76 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:08:21 +0200
+Subject: netfilter: nft_set_pipapo: fix improper element removal
+
+From: Florian Westphal <fw@strlen.de>
+
+[ Upstream commit 87b5a5c209405cb6b57424cdfa226a6dbd349232 ]
+
+end key should be equal to start unless NFT_SET_EXT_KEY_END is present.
+
+Its possible to add elements that only have a start key
+("{ 1.0.0.0 . 2.0.0.0 }") without an internval end.
+
+Insertion treats this via:
+
+if (nft_set_ext_exists(ext, NFT_SET_EXT_KEY_END))
+ end = (const u8 *)nft_set_ext_key_end(ext)->data;
+else
+ end = start;
+
+but removal side always uses nft_set_ext_key_end().
+This is wrong and leads to garbage remaining in the set after removal
+next lookup/insert attempt will give:
+
+BUG: KASAN: slab-use-after-free in pipapo_get+0x8eb/0xb90
+Read of size 1 at addr ffff888100d50586 by task nft-pipapo_uaf_/1399
+Call Trace:
+ kasan_report+0x105/0x140
+ pipapo_get+0x8eb/0xb90
+ nft_pipapo_insert+0x1dc/0x1710
+ nf_tables_newsetelem+0x31f5/0x4e00
+ ..
+
+Fixes: 3c4287f62044 ("nf_tables: Add set type for arbitrary concatenation of ranges")
+Reported-by: lonial con <kongln9170@gmail.com>
+Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
+Signed-off-by: Florian Westphal <fw@strlen.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/netfilter/nft_set_pipapo.c | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
+index 0452ee586c1cc..a81829c10feab 100644
+--- a/net/netfilter/nft_set_pipapo.c
++++ b/net/netfilter/nft_set_pipapo.c
+@@ -1930,7 +1930,11 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
+ int i, start, rules_fx;
+
+ match_start = data;
+- match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++
++ if (nft_set_ext_exists(&e->ext, NFT_SET_EXT_KEY_END))
++ match_end = (const u8 *)nft_set_ext_key_end(&e->ext)->data;
++ else
++ match_end = data;
+
+ start = first_rule;
+ rules_fx = rules_f0;
+--
+2.39.2
+
--- /dev/null
+From b8bfbeb43ba95b6189f76448167e05a0545f9706 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 16 Jul 2023 15:07:41 +0530
+Subject: octeontx2-pf: Dont allocate BPIDs for LBK interfaces
+
+From: Geetha sowjanya <gakula@marvell.com>
+
+[ Upstream commit 8fcd7c7b3a38ab5e452f542fda8f7940e77e479a ]
+
+Current driver enables backpressure for LBK interfaces.
+But these interfaces do not support this feature.
+Hence, this patch fixes the issue by skipping the
+backpressure configuration for these interfaces.
+
+Fixes: 75f36270990c ("octeontx2-pf: Support to enable/disable pause frames via ethtool").
+Signed-off-by: Geetha sowjanya <gakula@marvell.com>
+Signed-off-by: Sunil Goutham <sgoutham@marvell.com>
+Link: https://lore.kernel.org/r/20230716093741.28063-1-gakula@marvell.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+index ed911d9946277..c236dba80ff1a 100644
+--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
++++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+@@ -1452,8 +1452,9 @@ static int otx2_init_hw_resources(struct otx2_nic *pf)
+ if (err)
+ goto err_free_npa_lf;
+
+- /* Enable backpressure */
+- otx2_nix_config_bp(pf, true);
++ /* Enable backpressure for CGX mapped PF/VFs */
++ if (!is_otx2_lbkvf(pf->pdev))
++ otx2_nix_config_bp(pf, true);
+
+ /* Init Auras and pools used by NIX RQ, for free buffer ptrs */
+ err = otx2_rq_aura_pool_init(pf);
+--
+2.39.2
+
--- /dev/null
+From 2c90078841a0854ee8bf4c7fa749f54fbd044f83 Mon Sep 17 00:00:00 2001
+From: Christian Brauner <brauner@kernel.org>
+Date: Tue, 13 Jun 2023 10:13:37 +0200
+Subject: [PATCH AUTOSEL 4.19 06/11] ovl: check type and offset of struct
+ vfsmount in ovl_entry
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit f723edb8a532cd26e1ff0a2b271d73762d48f762 ]
+
+Porting overlayfs to the new amount api I started experiencing random
+crashes that couldn't be explained easily. So after much debugging and
+reasoning it became clear that struct ovl_entry requires the point to
+struct vfsmount to be the first member and of type struct vfsmount.
+
+During the port I added a new member at the beginning of struct
+ovl_entry which broke all over the place in the form of random crashes
+and cache corruptions. While there's a comment in ovl_free_fs() to the
+effect of "Hack! Reuse ofs->layers as a vfsmount array before freeing
+it" there's no such comment on struct ovl_entry which makes this easy to
+trip over.
+
+Add a comment and two static asserts for both the offset and the type of
+pointer in struct ovl_entry.
+
+Signed-off-by: Christian Brauner <brauner@kernel.org>
+Signed-off-by: Amir Goldstein <amir73il@gmail.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/overlayfs/ovl_entry.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
+index 1a1adc697c553..3d34ce992f0d8 100644
+--- a/fs/overlayfs/ovl_entry.h
++++ b/fs/overlayfs/ovl_entry.h
+@@ -28,6 +28,7 @@ struct ovl_sb {
+ };
+
+ struct ovl_layer {
++ /* ovl_free_fs() relies on @mnt being the first member! */
+ struct vfsmount *mnt;
+ /* Trap in ovl inode cache */
+ struct inode *trap;
+@@ -38,6 +39,14 @@ struct ovl_layer {
+ int fsid;
+ };
+
++/*
++ * ovl_free_fs() relies on @mnt being the first member when unmounting
++ * the private mounts created for each layer. Let's check both the
++ * offset and type.
++ */
++static_assert(offsetof(struct ovl_layer, mnt) == 0);
++static_assert(__same_type(typeof_member(struct ovl_layer, mnt), struct vfsmount *));
++
+ struct ovl_path {
+ struct ovl_layer *layer;
+ struct dentry *dentry;
+--
+2.39.2
+
--- /dev/null
+From 680f36a4f5e7d831b67c91dafe4f6c7797e53475 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Jul 2023 16:45:46 +0100
+Subject: perf build: Fix library not found error when using CSLIBS
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+From: James Clark <james.clark@arm.com>
+
+[ Upstream commit 1feece2780ac2f8de45177fe53979726cee4b3d1 ]
+
+-L only specifies the search path for libraries directly provided in the
+link line with -l. Because -lopencsd isn't specified, it's only linked
+because it's a dependency of -lopencsd_c_api. Dependencies like this are
+resolved using the default system search paths or -rpath-link=... rather
+than -L. This means that compilation only works if OpenCSD is installed
+to the system rather than provided with the CSLIBS (-L) option.
+
+This could be fixed by adding -Wl,-rpath-link=$(CSLIBS) but that is less
+conventional than just adding -lopencsd to the link line so that it uses
+-L. -lopencsd seems to have been removed in commit ed17b1914978eddb
+("perf tools: Drop requirement for libstdc++.so for libopencsd check")
+because it was thought that there was a chance compilation would work
+even if it didn't exist, but I think that only applies to libstdc++ so
+there is no harm to add it back. libopencsd.so and libopencsd_c_api.so
+would always exist together.
+
+Testing
+=======
+
+The following scenarios now all work:
+
+ * Cross build with OpenCSD installed
+ * Cross build using CSLIBS=...
+ * Native build with OpenCSD installed
+ * Native build using CSLIBS=...
+ * Static cross build with OpenCSD installed
+ * Static cross build with CSLIBS=...
+
+Committer testing:
+
+ ⬢[acme@toolbox perf-tools]$ alias m
+ alias m='make -k BUILD_BPF_SKEL=1 CORESIGHT=1 O=/tmp/build/perf-tools -C tools/perf install-bin && git status && perf test python ; perf record -o /dev/null sleep 0.01 ; perf stat --null sleep 0.01'
+ ⬢[acme@toolbox perf-tools]$ ldd ~/bin/perf | grep csd
+ libopencsd_c_api.so.1 => /lib64/libopencsd_c_api.so.1 (0x00007fd49c44e000)
+ libopencsd.so.1 => /lib64/libopencsd.so.1 (0x00007fd49bd56000)
+ ⬢[acme@toolbox perf-tools]$ cat /etc/redhat-release
+ Fedora release 36 (Thirty Six)
+ ⬢[acme@toolbox perf-tools]$
+
+Fixes: ed17b1914978eddb ("perf tools: Drop requirement for libstdc++.so for libopencsd check")
+Reported-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Signed-off-by: James Clark <james.clark@arm.com>
+Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Tested-by: Radhey Shyam Pandey <radhey.shyam.pandey@amd.com>
+Cc: Adrian Hunter <adrian.hunter@intel.com>
+Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
+Cc: Ian Rogers <irogers@google.com>
+Cc: Ingo Molnar <mingo@redhat.com>
+Cc: Jiri Olsa <jolsa@kernel.org>
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Namhyung Kim <namhyung@kernel.org>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Uwe Kleine-König <uwe@kleine-koenig.org>
+Cc: coresight@lists.linaro.org
+Closes: https://lore.kernel.org/linux-arm-kernel/56905d7a-a91e-883a-b707-9d5f686ba5f1@arm.com/
+Link: https://lore.kernel.org/all/36cc4dc6-bf4b-1093-1c0a-876e368af183@kleine-koenig.org/
+Link: https://lore.kernel.org/r/20230707154546.456720-1-james.clark@arm.com
+Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ tools/perf/Makefile.config | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
+index 898226ea8cadc..fac6ba07eacdb 100644
+--- a/tools/perf/Makefile.config
++++ b/tools/perf/Makefile.config
+@@ -149,9 +149,9 @@ FEATURE_CHECK_LDFLAGS-libcrypto = -lcrypto
+ ifdef CSINCLUDES
+ LIBOPENCSD_CFLAGS := -I$(CSINCLUDES)
+ endif
+-OPENCSDLIBS := -lopencsd_c_api
++OPENCSDLIBS := -lopencsd_c_api -lopencsd
+ ifeq ($(findstring -static,${LDFLAGS}),-static)
+- OPENCSDLIBS += -lopencsd -lstdc++
++ OPENCSDLIBS += -lstdc++
+ endif
+ ifdef CSLIBS
+ LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
+--
+2.39.2
+
--- /dev/null
+From 726cf612acdfe280e96ebb1977b1ec50b8c6ec28 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Jul 2023 12:18:58 +0100
+Subject: pinctrl: renesas: rzg2l: Handle non-unique subnode names
+
+From: Biju Das <biju.das.jz@bp.renesas.com>
+
+[ Upstream commit bfc374a145ae133613e05b9b89be561f169cb58d ]
+
+Currently, sd1 and sd0 have unique subnode names 'sd1_mux' and 'sd0_mux'.
+If we change these to non-unique subnode names such as 'mux' this can
+lead to the below conflict as the RZ/G2L pin control driver considers
+only the names of the subnodes.
+
+ pinctrl-rzg2l 11030000.pinctrl: pin P47_0 already requested by 11c00000.mmc; cannot claim for 11c10000.mmc
+ pinctrl-rzg2l 11030000.pinctrl: pin-376 (11c10000.mmc) status -22
+ pinctrl-rzg2l 11030000.pinctrl: could not request pin 376 (P47_0) from group mux on device pinctrl-rzg2l
+ renesas_sdhi_internal_dmac 11c10000.mmc: Error applying setting, reverse things back
+
+Fix this by constructing unique names from the node names of both the
+pin control configuration node and its child node, where appropriate.
+
+Based on the work done by Geert for the RZ/V2M pinctrl driver.
+
+Fixes: c4c4637eb57f ("pinctrl: renesas: Add RZ/G2L pin and gpio controller driver")
+Signed-off-by: Biju Das <biju.das.jz@bp.renesas.com>
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Link: https://lore.kernel.org/r/20230704111858.215278-1-biju.das.jz@bp.renesas.com
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/renesas/pinctrl-rzg2l.c | 28 ++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzg2l.c b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+index ca6303fc41f98..fd11d28e5a1e4 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzg2l.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzg2l.c
+@@ -246,6 +246,7 @@ static int rzg2l_map_add_config(struct pinctrl_map *map,
+
+ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ struct device_node *np,
++ struct device_node *parent,
+ struct pinctrl_map **map,
+ unsigned int *num_maps,
+ unsigned int *index)
+@@ -263,6 +264,7 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ struct property *prop;
+ int ret, gsel, fsel;
+ const char **pin_fn;
++ const char *name;
+ const char *pin;
+
+ pinmux = of_find_property(np, "pinmux", NULL);
+@@ -346,8 +348,19 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ psel_val[i] = MUX_FUNC(value);
+ }
+
++ if (parent) {
++ name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
++ parent, np);
++ if (!name) {
++ ret = -ENOMEM;
++ goto done;
++ }
++ } else {
++ name = np->name;
++ }
++
+ /* Register a single pin group listing all the pins we read from DT */
+- gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
++ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
+ if (gsel < 0) {
+ ret = gsel;
+ goto done;
+@@ -357,17 +370,16 @@ static int rzg2l_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ * Register a single group function where the 'data' is an array PSEL
+ * register values read from DT.
+ */
+- pin_fn[0] = np->name;
+- fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
+- psel_val);
++ pin_fn[0] = name;
++ fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
+ if (fsel < 0) {
+ ret = fsel;
+ goto remove_group;
+ }
+
+ maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
+- maps[idx].data.mux.group = np->name;
+- maps[idx].data.mux.function = np->name;
++ maps[idx].data.mux.group = name;
++ maps[idx].data.mux.function = name;
+ idx++;
+
+ dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
+@@ -414,7 +426,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
+ index = 0;
+
+ for_each_child_of_node(np, child) {
+- ret = rzg2l_dt_subnode_to_map(pctldev, child, map,
++ ret = rzg2l_dt_subnode_to_map(pctldev, child, np, map,
+ num_maps, &index);
+ if (ret < 0) {
+ of_node_put(child);
+@@ -423,7 +435,7 @@ static int rzg2l_dt_node_to_map(struct pinctrl_dev *pctldev,
+ }
+
+ if (*num_maps == 0) {
+- ret = rzg2l_dt_subnode_to_map(pctldev, np, map,
++ ret = rzg2l_dt_subnode_to_map(pctldev, np, NULL, map,
+ num_maps, &index);
+ if (ret < 0)
+ goto done;
+--
+2.39.2
+
--- /dev/null
+From 825d0cfe089333f10e47c7657c16035ce33865d3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Jul 2023 17:07:06 +0200
+Subject: pinctrl: renesas: rzv2m: Handle non-unique subnode names
+
+From: Geert Uytterhoeven <geert+renesas@glider.be>
+
+[ Upstream commit f46a0b47cc0829acd050213194c5a77351e619b2 ]
+
+The eMMC and SDHI pin control configuration nodes in DT have subnodes
+with the same names ("data" and "ctrl"). As the RZ/V2M pin control
+driver considers only the names of the subnodes, this leads to
+conflicts:
+
+ pinctrl-rzv2m b6250000.pinctrl: pin P8_2 already requested by 85000000.mmc; cannot claim for 85020000.mmc
+ pinctrl-rzv2m b6250000.pinctrl: pin-130 (85020000.mmc) status -22
+ renesas_sdhi_internal_dmac 85020000.mmc: Error applying setting, reverse things back
+
+Fix this by constructing unique names from the node names of both the
+pin control configuration node and its child node, where appropriate.
+
+Reported by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+
+Fixes: 92a9b825257614af ("pinctrl: renesas: Add RZ/V2M pin and gpio controller driver")
+Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Tested-by: Fabrizio Castro <fabrizio.castro.jz@renesas.com>
+Link: https://lore.kernel.org/r/607bd6ab4905b0b1b119a06ef953fa1184505777.1688396717.git.geert+renesas@glider.be
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pinctrl/renesas/pinctrl-rzv2m.c | 28 ++++++++++++++++++-------
+ 1 file changed, 20 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/pinctrl/renesas/pinctrl-rzv2m.c b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+index e8c18198bebd2..35f382b055e83 100644
+--- a/drivers/pinctrl/renesas/pinctrl-rzv2m.c
++++ b/drivers/pinctrl/renesas/pinctrl-rzv2m.c
+@@ -207,6 +207,7 @@ static int rzv2m_map_add_config(struct pinctrl_map *map,
+
+ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ struct device_node *np,
++ struct device_node *parent,
+ struct pinctrl_map **map,
+ unsigned int *num_maps,
+ unsigned int *index)
+@@ -224,6 +225,7 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ struct property *prop;
+ int ret, gsel, fsel;
+ const char **pin_fn;
++ const char *name;
+ const char *pin;
+
+ pinmux = of_find_property(np, "pinmux", NULL);
+@@ -307,8 +309,19 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ psel_val[i] = MUX_FUNC(value);
+ }
+
++ if (parent) {
++ name = devm_kasprintf(pctrl->dev, GFP_KERNEL, "%pOFn.%pOFn",
++ parent, np);
++ if (!name) {
++ ret = -ENOMEM;
++ goto done;
++ }
++ } else {
++ name = np->name;
++ }
++
+ /* Register a single pin group listing all the pins we read from DT */
+- gsel = pinctrl_generic_add_group(pctldev, np->name, pins, num_pinmux, NULL);
++ gsel = pinctrl_generic_add_group(pctldev, name, pins, num_pinmux, NULL);
+ if (gsel < 0) {
+ ret = gsel;
+ goto done;
+@@ -318,17 +331,16 @@ static int rzv2m_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+ * Register a single group function where the 'data' is an array PSEL
+ * register values read from DT.
+ */
+- pin_fn[0] = np->name;
+- fsel = pinmux_generic_add_function(pctldev, np->name, pin_fn, 1,
+- psel_val);
++ pin_fn[0] = name;
++ fsel = pinmux_generic_add_function(pctldev, name, pin_fn, 1, psel_val);
+ if (fsel < 0) {
+ ret = fsel;
+ goto remove_group;
+ }
+
+ maps[idx].type = PIN_MAP_TYPE_MUX_GROUP;
+- maps[idx].data.mux.group = np->name;
+- maps[idx].data.mux.function = np->name;
++ maps[idx].data.mux.group = name;
++ maps[idx].data.mux.function = name;
+ idx++;
+
+ dev_dbg(pctrl->dev, "Parsed %pOF with %d pins\n", np, num_pinmux);
+@@ -375,7 +387,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
+ index = 0;
+
+ for_each_child_of_node(np, child) {
+- ret = rzv2m_dt_subnode_to_map(pctldev, child, map,
++ ret = rzv2m_dt_subnode_to_map(pctldev, child, np, map,
+ num_maps, &index);
+ if (ret < 0) {
+ of_node_put(child);
+@@ -384,7 +396,7 @@ static int rzv2m_dt_node_to_map(struct pinctrl_dev *pctldev,
+ }
+
+ if (*num_maps == 0) {
+- ret = rzv2m_dt_subnode_to_map(pctldev, np, map,
++ ret = rzv2m_dt_subnode_to_map(pctldev, np, NULL, map,
+ num_maps, &index);
+ if (ret < 0)
+ goto done;
+--
+2.39.2
+
--- /dev/null
+From 1da38321c1da0aea4122e574000e2a97ee3d2378 Mon Sep 17 00:00:00 2001
+From: Ye Bin <yebin10@huawei.com>
+Date: Mon, 5 Jun 2023 22:07:31 +0800
+Subject: [PATCH AUTOSEL 4.19 04/11] quota: fix warning in dqgrab()
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit d6a95db3c7ad160bc16b89e36449705309b52bcb ]
+
+There's issue as follows when do fault injection:
+WARNING: CPU: 1 PID: 14870 at include/linux/quotaops.h:51 dquot_disable+0x13b7/0x18c0
+Modules linked in:
+CPU: 1 PID: 14870 Comm: fsconfig Not tainted 6.3.0-next-20230505-00006-g5107a9c821af-dirty #541
+RIP: 0010:dquot_disable+0x13b7/0x18c0
+RSP: 0018:ffffc9000acc79e0 EFLAGS: 00010246
+RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff88825e41b980
+RDX: 0000000000000000 RSI: ffff88825e41b980 RDI: 0000000000000002
+RBP: ffff888179f68000 R08: ffffffff82087ca7 R09: 0000000000000000
+R10: 0000000000000001 R11: ffffed102f3ed026 R12: ffff888179f68130
+R13: ffff888179f68110 R14: dffffc0000000000 R15: ffff888179f68118
+FS: 00007f450a073740(0000) GS:ffff88882fc00000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 00007ffe96f2efd8 CR3: 000000025c8ad000 CR4: 00000000000006e0
+DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+Call Trace:
+ <TASK>
+ dquot_load_quota_sb+0xd53/0x1060
+ dquot_resume+0x172/0x230
+ ext4_reconfigure+0x1dc6/0x27b0
+ reconfigure_super+0x515/0xa90
+ __x64_sys_fsconfig+0xb19/0xd20
+ do_syscall_64+0x39/0xb0
+ entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+Above issue may happens as follows:
+ProcessA ProcessB ProcessC
+sys_fsconfig
+ vfs_fsconfig_locked
+ reconfigure_super
+ ext4_remount
+ dquot_suspend -> suspend all type quota
+
+ sys_fsconfig
+ vfs_fsconfig_locked
+ reconfigure_super
+ ext4_remount
+ dquot_resume
+ ret = dquot_load_quota_sb
+ add_dquot_ref
+ do_open -> open file O_RDWR
+ vfs_open
+ do_dentry_open
+ get_write_access
+ atomic_inc_unless_negative(&inode->i_writecount)
+ ext4_file_open
+ dquot_file_open
+ dquot_initialize
+ __dquot_initialize
+ dqget
+ atomic_inc(&dquot->dq_count);
+
+ __dquot_initialize
+ __dquot_initialize
+ dqget
+ if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ ext4_acquire_dquot
+ -> Return error DQ_ACTIVE_B flag isn't set
+ dquot_disable
+ invalidate_dquots
+ if (atomic_read(&dquot->dq_count))
+ dqgrab
+ WARN_ON_ONCE(!test_bit(DQ_ACTIVE_B, &dquot->dq_flags))
+ -> Trigger warning
+
+In the above scenario, 'dquot->dq_flags' has no DQ_ACTIVE_B is normal when
+dqgrab().
+To solve above issue just replace the dqgrab() use in invalidate_dquots() with
+atomic_inc(&dquot->dq_count).
+
+Signed-off-by: Ye Bin <yebin10@huawei.com>
+Signed-off-by: Jan Kara <jack@suse.cz>
+Message-Id: <20230605140731.2427629-3-yebin10@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/quota/dquot.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
+index 0d3ffc727bb00..303987d29b9c9 100644
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -540,7 +540,7 @@ static void invalidate_dquots(struct super_block *sb, int type)
+ continue;
+ /* Wait for dquot users */
+ if (atomic_read(&dquot->dq_count)) {
+- dqgrab(dquot);
++ atomic_inc(&dquot->dq_count);
+ spin_unlock(&dq_list_lock);
+ /*
+ * Once dqput() wakes us up, we know it's time to free
+--
+2.39.2
+
--- /dev/null
+From 3e9e30aa708b3b8cb0485725964206a7b72d1f9b Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Mon, 5 Jun 2023 22:07:30 +0800
+Subject: [PATCH AUTOSEL 4.19 03/11] quota: Properly disable quotas when
+ add_dquot_ref() fails
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 6a4e3363792e30177cc3965697e34ddcea8b900b ]
+
+When add_dquot_ref() fails (usually due to IO error or ENOMEM), we want
+to disable quotas we are trying to enable. However dquot_disable() call
+was passed just the flags we are enabling so in case flags ==
+DQUOT_USAGE_ENABLED dquot_disable() call will just fail with EINVAL
+instead of properly disabling quotas. Fix the problem by always passing
+DQUOT_LIMITS_ENABLED | DQUOT_USAGE_ENABLED to dquot_disable() in this
+case.
+
+Reported-and-tested-by: Ye Bin <yebin10@huawei.com>
+Reported-by: syzbot+e633c79ceaecbf479854@syzkaller.appspotmail.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Message-Id: <20230605140731.2427629-2-yebin10@huawei.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/quota/dquot.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
+index 770a2b1434856..0d3ffc727bb00 100644
+--- a/fs/quota/dquot.c
++++ b/fs/quota/dquot.c
+@@ -2407,7 +2407,8 @@ int dquot_load_quota_sb(struct super_block *sb, int type, int format_id,
+
+ error = add_dquot_ref(sb, type);
+ if (error)
+- dquot_disable(sb, type, flags);
++ dquot_disable(sb, type,
++ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
+
+ return error;
+ out_fmt:
+--
+2.39.2
+
--- /dev/null
+From 4d3360fe4eb403c4add5725291d2c102bad4db73 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 7 Apr 2023 16:05:38 -0700
+Subject: rcu: Mark additional concurrent load from ->cpu_no_qs.b.exp
+
+From: Paul E. McKenney <paulmck@kernel.org>
+
+[ Upstream commit 9146eb25495ea8bfb5010192e61e3ed5805ce9ef ]
+
+The per-CPU rcu_data structure's ->cpu_no_qs.b.exp field is updated
+only on the instance corresponding to the current CPU, but can be read
+more widely. Unmarked accesses are OK from the corresponding CPU, but
+only if interrupts are disabled, given that interrupt handlers can and
+do modify this field.
+
+Unfortunately, although the load from rcu_preempt_deferred_qs() is always
+carried out from the corresponding CPU, interrupts are not necessarily
+disabled. This commit therefore upgrades this load to READ_ONCE.
+
+Similarly, the diagnostic access from synchronize_rcu_expedited_wait()
+might run with interrupts disabled and from some other CPU. This commit
+therefore marks this load with data_race().
+
+Finally, the C-language access in rcu_preempt_ctxt_queue() is OK as
+is because interrupts are disabled and this load is always from the
+corresponding CPU. This commit adds a comment giving the rationale for
+this access being safe.
+
+This data race was reported by KCSAN. Not appropriate for backporting
+due to failure being unlikely.
+
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tree_exp.h | 2 +-
+ kernel/rcu/tree_plugin.h | 4 +++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h
+index e25321dbb068e..aa3ec3c3b9f75 100644
+--- a/kernel/rcu/tree_exp.h
++++ b/kernel/rcu/tree_exp.h
+@@ -641,7 +641,7 @@ static void synchronize_rcu_expedited_wait(void)
+ "O."[!!cpu_online(cpu)],
+ "o."[!!(rdp->grpmask & rnp->expmaskinit)],
+ "N."[!!(rdp->grpmask & rnp->expmaskinitnext)],
+- "D."[!!(rdp->cpu_no_qs.b.exp)]);
++ "D."[!!data_race(rdp->cpu_no_qs.b.exp)]);
+ }
+ }
+ pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
+diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
+index e3142ee35fc6a..044026abfdd7f 100644
+--- a/kernel/rcu/tree_plugin.h
++++ b/kernel/rcu/tree_plugin.h
+@@ -257,6 +257,8 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
+ * GP should not be able to end until we report, so there should be
+ * no need to check for a subsequent expedited GP. (Though we are
+ * still in a quiescent state in any case.)
++ *
++ * Interrupts are disabled, so ->cpu_no_qs.b.exp cannot change.
+ */
+ if (blkd_state & RCU_EXP_BLKD && rdp->cpu_no_qs.b.exp)
+ rcu_report_exp_rdp(rdp);
+@@ -941,7 +943,7 @@ notrace void rcu_preempt_deferred_qs(struct task_struct *t)
+ {
+ struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
+
+- if (rdp->cpu_no_qs.b.exp)
++ if (READ_ONCE(rdp->cpu_no_qs.b.exp))
+ rcu_report_exp_rdp(rdp);
+ }
+
+--
+2.39.2
+
--- /dev/null
+From aef95e1bb3b2e697dd8a92a4b03466862cd224fd Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 3 Aug 2022 01:22:05 +0900
+Subject: rcu-tasks: Avoid pr_info() with spin lock in cblist_init_generic()
+
+From: Shigeru Yoshida <syoshida@redhat.com>
+
+[ Upstream commit 5fc8cbe4cf0fd34ded8045c385790c3bf04f6785 ]
+
+pr_info() is called with rtp->cbs_gbl_lock spin lock locked. Because
+pr_info() calls printk() that might sleep, this will result in BUG
+like below:
+
+[ 0.206455] cblist_init_generic: Setting adjustable number of callback queues.
+[ 0.206463]
+[ 0.206464] =============================
+[ 0.206464] [ BUG: Invalid wait context ]
+[ 0.206465] 5.19.0-00428-g9de1f9c8ca51 #5 Not tainted
+[ 0.206466] -----------------------------
+[ 0.206466] swapper/0/1 is trying to lock:
+[ 0.206467] ffffffffa0167a58 (&port_lock_key){....}-{3:3}, at: serial8250_console_write+0x327/0x4a0
+[ 0.206473] other info that might help us debug this:
+[ 0.206473] context-{5:5}
+[ 0.206474] 3 locks held by swapper/0/1:
+[ 0.206474] #0: ffffffff9eb597e0 (rcu_tasks.cbs_gbl_lock){....}-{2:2}, at: cblist_init_generic.constprop.0+0x14/0x1f0
+[ 0.206478] #1: ffffffff9eb579c0 (console_lock){+.+.}-{0:0}, at: _printk+0x63/0x7e
+[ 0.206482] #2: ffffffff9ea77780 (console_owner){....}-{0:0}, at: console_emit_next_record.constprop.0+0x111/0x330
+[ 0.206485] stack backtrace:
+[ 0.206486] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 5.19.0-00428-g9de1f9c8ca51 #5
+[ 0.206488] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.16.0-1.fc36 04/01/2014
+[ 0.206489] Call Trace:
+[ 0.206490] <TASK>
+[ 0.206491] dump_stack_lvl+0x6a/0x9f
+[ 0.206493] __lock_acquire.cold+0x2d7/0x2fe
+[ 0.206496] ? stack_trace_save+0x46/0x70
+[ 0.206497] lock_acquire+0xd1/0x2f0
+[ 0.206499] ? serial8250_console_write+0x327/0x4a0
+[ 0.206500] ? __lock_acquire+0x5c7/0x2720
+[ 0.206502] _raw_spin_lock_irqsave+0x3d/0x90
+[ 0.206504] ? serial8250_console_write+0x327/0x4a0
+[ 0.206506] serial8250_console_write+0x327/0x4a0
+[ 0.206508] console_emit_next_record.constprop.0+0x180/0x330
+[ 0.206511] console_unlock+0xf7/0x1f0
+[ 0.206512] vprintk_emit+0xf7/0x330
+[ 0.206514] _printk+0x63/0x7e
+[ 0.206516] cblist_init_generic.constprop.0.cold+0x24/0x32
+[ 0.206518] rcu_init_tasks_generic+0x5/0xd9
+[ 0.206522] kernel_init_freeable+0x15b/0x2a2
+[ 0.206523] ? rest_init+0x160/0x160
+[ 0.206526] kernel_init+0x11/0x120
+[ 0.206527] ret_from_fork+0x1f/0x30
+[ 0.206530] </TASK>
+[ 0.207018] cblist_init_generic: Setting shift to 1 and lim to 1.
+
+This patch moves pr_info() so that it is called without
+rtp->cbs_gbl_lock locked.
+
+Signed-off-by: Shigeru Yoshida <syoshida@redhat.com>
+Tested-by: "Zhang, Qiang1" <qiang1.zhang@intel.com>
+Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/rcu/tasks.h | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/kernel/rcu/tasks.h b/kernel/rcu/tasks.h
+index df968321feada..c1f18c63b9b14 100644
+--- a/kernel/rcu/tasks.h
++++ b/kernel/rcu/tasks.h
+@@ -233,7 +233,6 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
+ if (rcu_task_enqueue_lim < 0) {
+ rcu_task_enqueue_lim = 1;
+ rcu_task_cb_adjust = true;
+- pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
+ } else if (rcu_task_enqueue_lim == 0) {
+ rcu_task_enqueue_lim = 1;
+ }
+@@ -264,6 +263,10 @@ static void cblist_init_generic(struct rcu_tasks *rtp)
+ raw_spin_unlock_rcu_node(rtpcp); // irqs remain disabled.
+ }
+ raw_spin_unlock_irqrestore(&rtp->cbs_gbl_lock, flags);
++
++ if (rcu_task_cb_adjust)
++ pr_info("%s: Setting adjustable number of callback queues.\n", __func__);
++
+ pr_info("%s: Setting shift to %d and lim to %d.\n", __func__, data_race(rtp->percpu_enqueue_shift), data_race(rtp->percpu_enqueue_lim));
+ }
+
+--
+2.39.2
+
--- /dev/null
+From 242c82c4047048b1d67da8284935b57fc6abaa12 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:59:18 -0700
+Subject: Revert "tcp: avoid the lookup process failing to get sk in ehash
+ table"
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 81b3ade5d2b98ad6e0a473b0e1e420a801275592 ]
+
+This reverts commit 3f4ca5fafc08881d7a57daa20449d171f2887043.
+
+Commit 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in
+ehash table") reversed the order in how a socket is inserted into ehash
+to fix an issue that ehash-lookup could fail when reqsk/full sk/twsk are
+swapped. However, it introduced another lookup failure.
+
+The full socket in ehash is allocated from a slab with SLAB_TYPESAFE_BY_RCU
+and does not have SOCK_RCU_FREE, so the socket could be reused even while
+it is being referenced on another CPU doing RCU lookup.
+
+Let's say a socket is reused and inserted into the same hash bucket during
+lookup. After the blamed commit, a new socket is inserted at the end of
+the list. If that happens, we will skip sockets placed after the previous
+position of the reused socket, resulting in ehash lookup failure.
+
+As described in Documentation/RCU/rculist_nulls.rst, we should insert a
+new socket at the head of the list to avoid such an issue.
+
+This issue, the swap-lookup-failure, and another variant reported in [0]
+can all be handled properly by adding a locked ehash lookup suggested by
+Eric Dumazet [1].
+
+However, this issue could occur for every packet, thus more likely than
+the other two races, so let's revert the change for now.
+
+Link: https://lore.kernel.org/netdev/20230606064306.9192-1-duanmuquan@baidu.com/ [0]
+Link: https://lore.kernel.org/netdev/CANn89iK8snOz8TYOhhwfimC7ykYA78GA3Nyv8x06SZYa1nKdyA@mail.gmail.com/ [1]
+Fixes: 3f4ca5fafc08 ("tcp: avoid the lookup process failing to get sk in ehash table")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717215918.15723-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_hashtables.c | 17 ++---------------
+ net/ipv4/inet_timewait_sock.c | 8 ++++----
+ 2 files changed, 6 insertions(+), 19 deletions(-)
+
+diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
+index e8734ffca85a8..c19b462662ad0 100644
+--- a/net/ipv4/inet_hashtables.c
++++ b/net/ipv4/inet_hashtables.c
+@@ -650,20 +650,8 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+ spin_lock(lock);
+ if (osk) {
+ WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
+- ret = sk_hashed(osk);
+- if (ret) {
+- /* Before deleting the node, we insert a new one to make
+- * sure that the look-up-sk process would not miss either
+- * of them and that at least one node would exist in ehash
+- * table all the time. Otherwise there's a tiny chance
+- * that lookup process could find nothing in ehash table.
+- */
+- __sk_nulls_add_node_tail_rcu(sk, list);
+- sk_nulls_del_node_init_rcu(osk);
+- }
+- goto unlock;
+- }
+- if (found_dup_sk) {
++ ret = sk_nulls_del_node_init_rcu(osk);
++ } else if (found_dup_sk) {
+ *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
+ if (*found_dup_sk)
+ ret = false;
+@@ -672,7 +660,6 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
+ if (ret)
+ __sk_nulls_add_node_rcu(sk, list);
+
+-unlock:
+ spin_unlock(lock);
+
+ return ret;
+diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
+index beed32fff4841..1d77d992e6e77 100644
+--- a/net/ipv4/inet_timewait_sock.c
++++ b/net/ipv4/inet_timewait_sock.c
+@@ -91,10 +91,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
+ }
+ EXPORT_SYMBOL_GPL(inet_twsk_put);
+
+-static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+- struct hlist_nulls_head *list)
++static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
++ struct hlist_nulls_head *list)
+ {
+- hlist_nulls_add_tail_rcu(&tw->tw_node, list);
++ hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ }
+
+ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
+@@ -147,7 +147,7 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
+
+ spin_lock(lock);
+
+- inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
++ inet_twsk_add_node_rcu(tw, &ehead->chain);
+
+ /* Step 3: Remove SK from hash chain */
+ if (__sk_nulls_del_node_init_rcu(sk))
+--
+2.39.2
+
--- /dev/null
+From 8455627afba0715ac09ca4e31fd0ca55986494f2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 30 May 2023 16:25:07 +0800
+Subject: sched/fair: Don't balance task to its current running CPU
+
+From: Yicong Yang <yangyicong@hisilicon.com>
+
+[ Upstream commit 0dd37d6dd33a9c23351e6115ae8cdac7863bc7de ]
+
+We've run into the case that the balancer tries to balance a migration
+disabled task and trigger the warning in set_task_cpu() like below:
+
+ ------------[ cut here ]------------
+ WARNING: CPU: 7 PID: 0 at kernel/sched/core.c:3115 set_task_cpu+0x188/0x240
+ Modules linked in: hclgevf xt_CHECKSUM ipt_REJECT nf_reject_ipv4 <...snip>
+ CPU: 7 PID: 0 Comm: swapper/7 Kdump: loaded Tainted: G O 6.1.0-rc4+ #1
+ Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 CS V5.B221.01 12/09/2021
+ pstate: 604000c9 (nZCv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : set_task_cpu+0x188/0x240
+ lr : load_balance+0x5d0/0xc60
+ sp : ffff80000803bc70
+ x29: ffff80000803bc70 x28: ffff004089e190e8 x27: ffff004089e19040
+ x26: ffff007effcabc38 x25: 0000000000000000 x24: 0000000000000001
+ x23: ffff80000803be84 x22: 000000000000000c x21: ffffb093e79e2a78
+ x20: 000000000000000c x19: ffff004089e19040 x18: 0000000000000000
+ x17: 0000000000001fad x16: 0000000000000030 x15: 0000000000000000
+ x14: 0000000000000003 x13: 0000000000000000 x12: 0000000000000000
+ x11: 0000000000000001 x10: 0000000000000400 x9 : ffffb093e4cee530
+ x8 : 00000000fffffffe x7 : 0000000000ce168a x6 : 000000000000013e
+ x5 : 00000000ffffffe1 x4 : 0000000000000001 x3 : 0000000000000b2a
+ x2 : 0000000000000b2a x1 : ffffb093e6d6c510 x0 : 0000000000000001
+ Call trace:
+ set_task_cpu+0x188/0x240
+ load_balance+0x5d0/0xc60
+ rebalance_domains+0x26c/0x380
+ _nohz_idle_balance.isra.0+0x1e0/0x370
+ run_rebalance_domains+0x6c/0x80
+ __do_softirq+0x128/0x3d8
+ ____do_softirq+0x18/0x24
+ call_on_irq_stack+0x2c/0x38
+ do_softirq_own_stack+0x24/0x3c
+ __irq_exit_rcu+0xcc/0xf4
+ irq_exit_rcu+0x18/0x24
+ el1_interrupt+0x4c/0xe4
+ el1h_64_irq_handler+0x18/0x2c
+ el1h_64_irq+0x74/0x78
+ arch_cpu_idle+0x18/0x4c
+ default_idle_call+0x58/0x194
+ do_idle+0x244/0x2b0
+ cpu_startup_entry+0x30/0x3c
+ secondary_start_kernel+0x14c/0x190
+ __secondary_switched+0xb0/0xb4
+ ---[ end trace 0000000000000000 ]---
+
+Further investigation shows that the warning is superfluous, the migration
+disabled task is just going to be migrated to its current running CPU.
+This is because that on load balance if the dst_cpu is not allowed by the
+task, we'll re-select a new_dst_cpu as a candidate. If no task can be
+balanced to dst_cpu we'll try to balance the task to the new_dst_cpu
+instead. In this case when the migration disabled task is not on CPU it
+only allows to run on its current CPU, load balance will select its
+current CPU as new_dst_cpu and later triggers the warning above.
+
+The new_dst_cpu is chosen from the env->dst_grpmask. Currently it
+contains CPUs in sched_group_span() and if we have overlapped groups it's
+possible to run into this case. This patch makes env->dst_grpmask of
+group_balance_mask() which exclude any CPUs from the busiest group and
+solve the issue. For balancing in a domain with no overlapped groups
+the behaviour keeps same as before.
+
+Suggested-by: Vincent Guittot <vincent.guittot@linaro.org>
+Signed-off-by: Yicong Yang <yangyicong@hisilicon.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Vincent Guittot <vincent.guittot@linaro.org>
+Link: https://lore.kernel.org/r/20230530082507.10444-1-yangyicong@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index fa33c441ae867..57d39de0962d7 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -10556,7 +10556,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
+ .sd = sd,
+ .dst_cpu = this_cpu,
+ .dst_rq = this_rq,
+- .dst_grpmask = sched_group_span(sd->groups),
++ .dst_grpmask = group_balance_mask(sd->groups),
+ .idle = idle,
+ .loop_break = SCHED_NR_MIGRATE_BREAK,
+ .cpus = cpus,
+--
+2.39.2
+
--- /dev/null
+From 87c0b2894b5bff97a3b231e21a5467e96e6ba324 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 16:07:47 +0800
+Subject: sched/fair: Use recent_used_cpu to test p->cpus_ptr
+
+From: Miaohe Lin <linmiaohe@huawei.com>
+
+[ Upstream commit ae2ad293d6be143ad223f5f947cca07bcbe42595 ]
+
+When checking whether a recently used CPU can be a potential idle
+candidate, recent_used_cpu should be used to test p->cpus_ptr as
+p->recent_used_cpu is not equal to recent_used_cpu and candidate
+decision is made based on recent_used_cpu here.
+
+Fixes: 89aafd67f28c ("sched/fair: Use prev instead of new target as recent_used_cpu")
+Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Reviewed-by: Phil Auld <pauld@redhat.com>
+Acked-by: Mel Gorman <mgorman@suse.de>
+Link: https://lore.kernel.org/r/20230620080747.359122-1-linmiaohe@huawei.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/fair.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
+index 57d39de0962d7..5e5aea2360a87 100644
+--- a/kernel/sched/fair.c
++++ b/kernel/sched/fair.c
+@@ -6935,7 +6935,7 @@ static int select_idle_sibling(struct task_struct *p, int prev, int target)
+ recent_used_cpu != target &&
+ cpus_share_cache(recent_used_cpu, target) &&
+ (available_idle_cpu(recent_used_cpu) || sched_idle_cpu(recent_used_cpu)) &&
+- cpumask_test_cpu(p->recent_used_cpu, p->cpus_ptr) &&
++ cpumask_test_cpu(recent_used_cpu, p->cpus_ptr) &&
+ asym_fits_cpu(task_util, util_min, util_max, recent_used_cpu)) {
+ return recent_used_cpu;
+ }
+--
+2.39.2
+
--- /dev/null
+From 24ad138c2ace2a7a5bc0ceccb0055be994ccc3ad Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:18 +0200
+Subject: sched/psi: Allow unprivileged polling of N*2s period
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit d82caa273565b45fcf103148950549af76c314b0 ]
+
+PSI offers 2 mechanisms to get information about a specific resource
+pressure. One is reading from /proc/pressure/<resource>, which gives
+average pressures aggregated every 2s. The other is creating a pollable
+fd for a specific resource and cgroup.
+
+The trigger creation requires CAP_SYS_RESOURCE, and gives the
+possibility to pick specific time window and threshold, spawing an RT
+thread to aggregate the data.
+
+Systemd would like to provide containers the option to monitor pressure
+on their own cgroup and sub-cgroups. For example, if systemd launches a
+container that itself then launches services, the container should have
+the ability to poll() for pressure in individual services. But neither
+the container nor the services are privileged.
+
+This patch implements a mechanism to allow unprivileged users to create
+pressure triggers. The difference with privileged triggers creation is
+that unprivileged ones must have a time window that's a multiple of 2s.
+This is so that we can avoid unrestricted spawning of rt threads, and
+use instead the same aggregation mechanism done for the averages, which
+runs independently of any triggers.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-5-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ Documentation/accounting/psi.rst | 4 +
+ include/linux/psi.h | 2 +-
+ include/linux/psi_types.h | 7 ++
+ kernel/cgroup/cgroup.c | 2 +-
+ kernel/sched/psi.c | 175 +++++++++++++++++++------------
+ 5 files changed, 121 insertions(+), 69 deletions(-)
+
+diff --git a/Documentation/accounting/psi.rst b/Documentation/accounting/psi.rst
+index 5e40b3f437f90..df6062eb3abbc 100644
+--- a/Documentation/accounting/psi.rst
++++ b/Documentation/accounting/psi.rst
+@@ -105,6 +105,10 @@ prevent overly frequent polling. Max limit is chosen as a high enough number
+ after which monitors are most likely not needed and psi averages can be used
+ instead.
+
++Unprivileged users can also create monitors, with the only limitation that the
++window size must be a multiple of 2s, in order to prevent excessive resource
++usage.
++
+ When activated, psi monitor stays active for at least the duration of one
+ tracking window to avoid repeated activations/deactivations when system is
+ bouncing in and out of the stall state.
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index b029a847def1e..ab26200c28033 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -24,7 +24,7 @@ void psi_memstall_leave(unsigned long *flags);
+
+ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+- char *buf, enum psi_res res);
++ char *buf, enum psi_res res, struct file *file);
+ void psi_trigger_destroy(struct psi_trigger *t);
+
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 1819afa8b1987..040c089581c6c 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -151,6 +151,9 @@ struct psi_trigger {
+
+ /* Deferred event(s) from previous ratelimit window */
+ bool pending_event;
++
++ /* Trigger type - PSI_AVGS for unprivileged, PSI_POLL for RT */
++ enum psi_aggregators aggregator;
+ };
+
+ struct psi_group {
+@@ -171,6 +174,10 @@ struct psi_group {
+ /* Aggregator work control */
+ struct delayed_work avgs_work;
+
++ /* Unprivileged triggers against N*PSI_FREQ windows */
++ struct list_head avg_triggers;
++ u32 avg_nr_triggers[NR_PSI_STATES - 1];
++
+ /* Total stall times and sampled pressure averages */
+ u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
+ unsigned long avg[NR_PSI_STATES - 1][3];
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index 2380c4daef33d..c35efae566a4b 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
+ }
+
+ psi = cgroup_psi(cgrp);
+- new = psi_trigger_create(psi, buf, res);
++ new = psi_trigger_create(psi, buf, res, of->file);
+ if (IS_ERR(new)) {
+ cgroup_put(cgrp);
+ return PTR_ERR(new);
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index f3df6a8ff493c..e072f6b31bf30 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -186,9 +186,14 @@ static void group_init(struct psi_group *group)
+ seqcount_init(&per_cpu_ptr(group->pcpu, cpu)->seq);
+ group->avg_last_update = sched_clock();
+ group->avg_next_update = group->avg_last_update + psi_period;
+- INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
+ mutex_init(&group->avgs_lock);
+- /* Init trigger-related members */
++
++ /* Init avg trigger-related members */
++ INIT_LIST_HEAD(&group->avg_triggers);
++ memset(group->avg_nr_triggers, 0, sizeof(group->avg_nr_triggers));
++ INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
++
++ /* Init rtpoll trigger-related members */
+ atomic_set(&group->rtpoll_scheduled, 0);
+ mutex_init(&group->rtpoll_trigger_lock);
+ INIT_LIST_HEAD(&group->rtpoll_triggers);
+@@ -430,21 +435,32 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+ return growth;
+ }
+
+-static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total)
++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
++ enum psi_aggregators aggregator)
+ {
+ struct psi_trigger *t;
+- u64 *total = group->total[PSI_POLL];
++ u64 *total = group->total[aggregator];
++ struct list_head *triggers;
++ u64 *aggregator_total;
+ *update_total = false;
+
++ if (aggregator == PSI_AVGS) {
++ triggers = &group->avg_triggers;
++ aggregator_total = group->avg_total;
++ } else {
++ triggers = &group->rtpoll_triggers;
++ aggregator_total = group->rtpoll_total;
++ }
++
+ /*
+ * On subsequent updates, calculate growth deltas and let
+ * watchers know when their specified thresholds are exceeded.
+ */
+- list_for_each_entry(t, &group->rtpoll_triggers, node) {
++ list_for_each_entry(t, triggers, node) {
+ u64 growth;
+ bool new_stall;
+
+- new_stall = group->rtpoll_total[t->state] != total[t->state];
++ new_stall = aggregator_total[t->state] != total[t->state];
+
+ /* Check for stall activity or a previous threshold breach */
+ if (!new_stall && !t->pending_event)
+@@ -546,6 +562,7 @@ static void psi_avgs_work(struct work_struct *work)
+ struct delayed_work *dwork;
+ struct psi_group *group;
+ u32 changed_states;
++ bool update_total;
+ u64 now;
+
+ dwork = to_delayed_work(work);
+@@ -563,8 +580,10 @@ static void psi_avgs_work(struct work_struct *work)
+ * Once restarted, we'll catch up the running averages in one
+ * go - see calc_avgs() and missed_periods.
+ */
+- if (now >= group->avg_next_update)
++ if (now >= group->avg_next_update) {
++ update_triggers(group, now, &update_total, PSI_AVGS);
+ group->avg_next_update = update_averages(group, now);
++ }
+
+ if (changed_states & PSI_STATE_RESCHEDULE) {
+ schedule_delayed_work(dwork, nsecs_to_jiffies(
+@@ -574,7 +593,7 @@ static void psi_avgs_work(struct work_struct *work)
+ mutex_unlock(&group->avgs_lock);
+ }
+
+-static void init_triggers(struct psi_group *group, u64 now)
++static void init_rtpoll_triggers(struct psi_group *group, u64 now)
+ {
+ struct psi_trigger *t;
+
+@@ -667,7 +686,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+ if (changed_states & group->rtpoll_states) {
+ /* Initialize trigger windows when entering polling mode */
+ if (now > group->rtpoll_until)
+- init_triggers(group, now);
++ init_rtpoll_triggers(group, now);
+
+ /*
+ * Keep the monitor active for at least the duration of the
+@@ -684,7 +703,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+ }
+
+ if (now >= group->rtpoll_next_update) {
+- group->rtpoll_next_update = update_triggers(group, now, &update_total);
++ group->rtpoll_next_update = update_triggers(group, now, &update_total, PSI_POLL);
+ if (update_total)
+ memcpy(group->rtpoll_total, group->total[PSI_POLL],
+ sizeof(group->rtpoll_total));
+@@ -1254,16 +1273,23 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+ }
+
+ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+- char *buf, enum psi_res res)
++ char *buf, enum psi_res res, struct file *file)
+ {
+ struct psi_trigger *t;
+ enum psi_states state;
+ u32 threshold_us;
++ bool privileged;
+ u32 window_us;
+
+ if (static_branch_likely(&psi_disabled))
+ return ERR_PTR(-EOPNOTSUPP);
+
++ /*
++ * Checking the privilege here on file->f_cred implies that a privileged user
++ * could open the file and delegate the write to an unprivileged one.
++ */
++ privileged = cap_raised(file->f_cred->cap_effective, CAP_SYS_RESOURCE);
++
+ if (sscanf(buf, "some %u %u", &threshold_us, &window_us) == 2)
+ state = PSI_IO_SOME + res * 2;
+ else if (sscanf(buf, "full %u %u", &threshold_us, &window_us) == 2)
+@@ -1283,6 +1309,13 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ window_us > WINDOW_MAX_US)
+ return ERR_PTR(-EINVAL);
+
++ /*
++ * Unprivileged users can only use 2s windows so that averages aggregation
++ * work is used, and no RT threads need to be spawned.
++ */
++ if (!privileged && window_us % 2000000)
++ return ERR_PTR(-EINVAL);
++
+ /* Check threshold */
+ if (threshold_us == 0 || threshold_us > window_us)
+ return ERR_PTR(-EINVAL);
+@@ -1302,31 +1335,40 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ t->last_event_time = 0;
+ init_waitqueue_head(&t->event_wait);
+ t->pending_event = false;
++ t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+
+- mutex_lock(&group->rtpoll_trigger_lock);
++ if (privileged) {
++ mutex_lock(&group->rtpoll_trigger_lock);
+
+- if (!rcu_access_pointer(group->rtpoll_task)) {
+- struct task_struct *task;
++ if (!rcu_access_pointer(group->rtpoll_task)) {
++ struct task_struct *task;
+
+- task = kthread_create(psi_rtpoll_worker, group, "psimon");
+- if (IS_ERR(task)) {
+- kfree(t);
+- mutex_unlock(&group->rtpoll_trigger_lock);
+- return ERR_CAST(task);
++ task = kthread_create(psi_rtpoll_worker, group, "psimon");
++ if (IS_ERR(task)) {
++ kfree(t);
++ mutex_unlock(&group->rtpoll_trigger_lock);
++ return ERR_CAST(task);
++ }
++ atomic_set(&group->rtpoll_wakeup, 0);
++ wake_up_process(task);
++ rcu_assign_pointer(group->rtpoll_task, task);
+ }
+- atomic_set(&group->rtpoll_wakeup, 0);
+- wake_up_process(task);
+- rcu_assign_pointer(group->rtpoll_task, task);
+- }
+
+- list_add(&t->node, &group->rtpoll_triggers);
+- group->rtpoll_min_period = min(group->rtpoll_min_period,
+- div_u64(t->win.size, UPDATES_PER_WINDOW));
+- group->rtpoll_nr_triggers[t->state]++;
+- group->rtpoll_states |= (1 << t->state);
++ list_add(&t->node, &group->rtpoll_triggers);
++ group->rtpoll_min_period = min(group->rtpoll_min_period,
++ div_u64(t->win.size, UPDATES_PER_WINDOW));
++ group->rtpoll_nr_triggers[t->state]++;
++ group->rtpoll_states |= (1 << t->state);
+
+- mutex_unlock(&group->rtpoll_trigger_lock);
++ mutex_unlock(&group->rtpoll_trigger_lock);
++ } else {
++ mutex_lock(&group->avgs_lock);
++
++ list_add(&t->node, &group->avg_triggers);
++ group->avg_nr_triggers[t->state]++;
+
++ mutex_unlock(&group->avgs_lock);
++ }
+ return t;
+ }
+
+@@ -1350,34 +1392,41 @@ void psi_trigger_destroy(struct psi_trigger *t)
+ */
+ wake_up_pollfree(&t->event_wait);
+
+- mutex_lock(&group->rtpoll_trigger_lock);
+-
+- if (!list_empty(&t->node)) {
+- struct psi_trigger *tmp;
+- u64 period = ULLONG_MAX;
+-
+- list_del(&t->node);
+- group->rtpoll_nr_triggers[t->state]--;
+- if (!group->rtpoll_nr_triggers[t->state])
+- group->rtpoll_states &= ~(1 << t->state);
+- /* reset min update period for the remaining triggers */
+- list_for_each_entry(tmp, &group->rtpoll_triggers, node)
+- period = min(period, div_u64(tmp->win.size,
+- UPDATES_PER_WINDOW));
+- group->rtpoll_min_period = period;
+- /* Destroy rtpoll_task when the last trigger is destroyed */
+- if (group->rtpoll_states == 0) {
+- group->rtpoll_until = 0;
+- task_to_destroy = rcu_dereference_protected(
+- group->rtpoll_task,
+- lockdep_is_held(&group->rtpoll_trigger_lock));
+- rcu_assign_pointer(group->rtpoll_task, NULL);
+- del_timer(&group->rtpoll_timer);
++ if (t->aggregator == PSI_AVGS) {
++ mutex_lock(&group->avgs_lock);
++ if (!list_empty(&t->node)) {
++ list_del(&t->node);
++ group->avg_nr_triggers[t->state]--;
+ }
++ mutex_unlock(&group->avgs_lock);
++ } else {
++ mutex_lock(&group->rtpoll_trigger_lock);
++ if (!list_empty(&t->node)) {
++ struct psi_trigger *tmp;
++ u64 period = ULLONG_MAX;
++
++ list_del(&t->node);
++ group->rtpoll_nr_triggers[t->state]--;
++ if (!group->rtpoll_nr_triggers[t->state])
++ group->rtpoll_states &= ~(1 << t->state);
++ /* reset min update period for the remaining triggers */
++ list_for_each_entry(tmp, &group->rtpoll_triggers, node)
++ period = min(period, div_u64(tmp->win.size,
++ UPDATES_PER_WINDOW));
++ group->rtpoll_min_period = period;
++ /* Destroy rtpoll_task when the last trigger is destroyed */
++ if (group->rtpoll_states == 0) {
++ group->rtpoll_until = 0;
++ task_to_destroy = rcu_dereference_protected(
++ group->rtpoll_task,
++ lockdep_is_held(&group->rtpoll_trigger_lock));
++ rcu_assign_pointer(group->rtpoll_task, NULL);
++ del_timer(&group->rtpoll_timer);
++ }
++ }
++ mutex_unlock(&group->rtpoll_trigger_lock);
+ }
+
+- mutex_unlock(&group->rtpoll_trigger_lock);
+-
+ /*
+ * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
+ * critical section before destroying the trigger and optionally the
+@@ -1437,27 +1486,19 @@ static int psi_cpu_show(struct seq_file *m, void *v)
+ return psi_show(m, &psi_system, PSI_CPU);
+ }
+
+-static int psi_open(struct file *file, int (*psi_show)(struct seq_file *, void *))
+-{
+- if (file->f_mode & FMODE_WRITE && !capable(CAP_SYS_RESOURCE))
+- return -EPERM;
+-
+- return single_open(file, psi_show, NULL);
+-}
+-
+ static int psi_io_open(struct inode *inode, struct file *file)
+ {
+- return psi_open(file, psi_io_show);
++ return single_open(file, psi_io_show, NULL);
+ }
+
+ static int psi_memory_open(struct inode *inode, struct file *file)
+ {
+- return psi_open(file, psi_memory_show);
++ return single_open(file, psi_memory_show, NULL);
+ }
+
+ static int psi_cpu_open(struct inode *inode, struct file *file)
+ {
+- return psi_open(file, psi_cpu_show);
++ return single_open(file, psi_cpu_show, NULL);
+ }
+
+ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+@@ -1491,7 +1532,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+ return -EBUSY;
+ }
+
+- new = psi_trigger_create(&psi_system, buf, res);
++ new = psi_trigger_create(&psi_system, buf, res, file);
+ if (IS_ERR(new)) {
+ mutex_unlock(&seq->lock);
+ return PTR_ERR(new);
+@@ -1571,7 +1612,7 @@ static int psi_irq_show(struct seq_file *m, void *v)
+
+ static int psi_irq_open(struct inode *inode, struct file *file)
+ {
+- return psi_open(file, psi_irq_show);
++ return single_open(file, psi_irq_show, NULL);
+ }
+
+ static ssize_t psi_irq_write(struct file *file, const char __user *user_buf,
+--
+2.39.2
+
--- /dev/null
+From 3d78ff2fdc7f963507676dadc4a58e7433f61819 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:17 +0200
+Subject: sched/psi: Extract update_triggers side effect
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 4468fcae49f08e88fbbffe05b29496192df89991 ]
+
+This change moves update_total flag out of update_triggers function,
+currently called only in psi_poll_work.
+In the next patch, update_triggers will be called also in psi_avgs_work,
+but the total update information is specific to psi_poll_work.
+Returning update_total value to the caller let us avoid differentiating
+the implementation of update_triggers for different aggregators.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-4-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/psi.c | 19 ++++++++++---------
+ 1 file changed, 10 insertions(+), 9 deletions(-)
+
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index a3d0b5cf797ab..f3df6a8ff493c 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -430,11 +430,11 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+ return growth;
+ }
+
+-static u64 update_triggers(struct psi_group *group, u64 now)
++static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total)
+ {
+ struct psi_trigger *t;
+- bool update_total = false;
+ u64 *total = group->total[PSI_POLL];
++ *update_total = false;
+
+ /*
+ * On subsequent updates, calculate growth deltas and let
+@@ -462,7 +462,7 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+ * been through all of them. Also remember to extend the
+ * polling time if we see new stall activity.
+ */
+- update_total = true;
++ *update_total = true;
+
+ /* Calculate growth since last update */
+ growth = window_update(&t->win, now, total[t->state]);
+@@ -485,10 +485,6 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+ t->pending_event = false;
+ }
+
+- if (update_total)
+- memcpy(group->rtpoll_total, total,
+- sizeof(group->rtpoll_total));
+-
+ return now + group->rtpoll_min_period;
+ }
+
+@@ -622,6 +618,7 @@ static void psi_rtpoll_work(struct psi_group *group)
+ {
+ bool force_reschedule = false;
+ u32 changed_states;
++ bool update_total;
+ u64 now;
+
+ mutex_lock(&group->rtpoll_trigger_lock);
+@@ -686,8 +683,12 @@ static void psi_rtpoll_work(struct psi_group *group)
+ goto out;
+ }
+
+- if (now >= group->rtpoll_next_update)
+- group->rtpoll_next_update = update_triggers(group, now);
++ if (now >= group->rtpoll_next_update) {
++ group->rtpoll_next_update = update_triggers(group, now, &update_total);
++ if (update_total)
++ memcpy(group->rtpoll_total, group->total[PSI_POLL],
++ sizeof(group->rtpoll_total));
++ }
+
+ psi_schedule_rtpoll_work(group,
+ nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
+--
+2.39.2
+
--- /dev/null
+From cd6a5ae395de7987446d45c2944bc8de4a8917f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Oct 2022 19:05:51 +0800
+Subject: sched/psi: Fix avgs_work re-arm in psi_avgs_work()
+
+From: Chengming Zhou <zhouchengming@bytedance.com>
+
+[ Upstream commit 2fcd7bbae90a6d844da8660a9d27079281dfbba2 ]
+
+Pavan reported a problem that PSI avgs_work idle shutoff is not
+working at all. Because PSI_NONIDLE condition would be observed in
+psi_avgs_work()->collect_percpu_times()->get_recent_times() even if
+only the kworker running avgs_work on the CPU.
+
+Although commit 1b69ac6b40eb ("psi: fix aggregation idle shut-off")
+avoided the ping-pong wake problem when the worker sleep, psi_avgs_work()
+still will always re-arm the avgs_work, so shutoff is not working.
+
+This patch changes to use PSI_STATE_RESCHEDULE to flag whether to
+re-arm avgs_work in get_recent_times(). For the current CPU, we re-arm
+avgs_work only when (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0),
+for other CPUs we can just check PSI_NONIDLE delta. The new flag
+is only used in psi_avgs_work(), so we check in get_recent_times()
+that current_work() is avgs_work.
+
+One potential problem is that the brief period of non-idle time
+incurred between the aggregation run and the kworker's dequeue will
+be stranded in the per-cpu buckets until avgs_work run next time.
+The buckets can hold 4s worth of time, and future activity will wake
+the avgs_work with a 2s delay, giving us 2s worth of data we can leave
+behind when shut off the avgs_work. If the kworker run other works after
+avgs_work shut off and doesn't have any scheduler activities for 2s,
+this maybe a problem.
+
+Reported-by: Pavan Kondeti <quic_pkondeti@quicinc.com>
+Signed-off-by: Chengming Zhou <zhouchengming@bytedance.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Acked-by: Suren Baghdasaryan <surenb@google.com>
+Tested-by: Chengming Zhou <zhouchengming@bytedance.com>
+Link: https://lore.kernel.org/r/20221014110551.22695-1-zhouchengming@bytedance.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi_types.h | 3 +++
+ kernel/sched/psi.c | 30 +++++++++++++++++++++++++++---
+ 2 files changed, 30 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 14a1ebb74e11f..1e0a0d7ace3af 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -72,6 +72,9 @@ enum psi_states {
+ /* Use one bit in the state mask to track TSK_ONCPU */
+ #define PSI_ONCPU (1 << NR_PSI_STATES)
+
++/* Flag whether to re-arm avgs_work, see details in get_recent_times() */
++#define PSI_STATE_RESCHEDULE (1 << (NR_PSI_STATES + 1))
++
+ enum psi_aggregators {
+ PSI_AVGS = 0,
+ PSI_POLL,
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index e83c321461cf4..02e011cabe917 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -243,6 +243,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
+ u32 *pchanged_states)
+ {
+ struct psi_group_cpu *groupc = per_cpu_ptr(group->pcpu, cpu);
++ int current_cpu = raw_smp_processor_id();
++ unsigned int tasks[NR_PSI_TASK_COUNTS];
+ u64 now, state_start;
+ enum psi_states s;
+ unsigned int seq;
+@@ -257,6 +259,8 @@ static void get_recent_times(struct psi_group *group, int cpu,
+ memcpy(times, groupc->times, sizeof(groupc->times));
+ state_mask = groupc->state_mask;
+ state_start = groupc->state_start;
++ if (cpu == current_cpu)
++ memcpy(tasks, groupc->tasks, sizeof(groupc->tasks));
+ } while (read_seqcount_retry(&groupc->seq, seq));
+
+ /* Calculate state time deltas against the previous snapshot */
+@@ -281,6 +285,28 @@ static void get_recent_times(struct psi_group *group, int cpu,
+ if (delta)
+ *pchanged_states |= (1 << s);
+ }
++
++ /*
++ * When collect_percpu_times() from the avgs_work, we don't want to
++ * re-arm avgs_work when all CPUs are IDLE. But the current CPU running
++ * this avgs_work is never IDLE, cause avgs_work can't be shut off.
++ * So for the current CPU, we need to re-arm avgs_work only when
++ * (NR_RUNNING > 1 || NR_IOWAIT > 0 || NR_MEMSTALL > 0), for other CPUs
++ * we can just check PSI_NONIDLE delta.
++ */
++ if (current_work() == &group->avgs_work.work) {
++ bool reschedule;
++
++ if (cpu == current_cpu)
++ reschedule = tasks[NR_RUNNING] +
++ tasks[NR_IOWAIT] +
++ tasks[NR_MEMSTALL] > 1;
++ else
++ reschedule = *pchanged_states & (1 << PSI_NONIDLE);
++
++ if (reschedule)
++ *pchanged_states |= PSI_STATE_RESCHEDULE;
++ }
+ }
+
+ static void calc_avgs(unsigned long avg[3], int missed_periods,
+@@ -416,7 +442,6 @@ static void psi_avgs_work(struct work_struct *work)
+ struct delayed_work *dwork;
+ struct psi_group *group;
+ u32 changed_states;
+- bool nonidle;
+ u64 now;
+
+ dwork = to_delayed_work(work);
+@@ -427,7 +452,6 @@ static void psi_avgs_work(struct work_struct *work)
+ now = sched_clock();
+
+ collect_percpu_times(group, PSI_AVGS, &changed_states);
+- nonidle = changed_states & (1 << PSI_NONIDLE);
+ /*
+ * If there is task activity, periodically fold the per-cpu
+ * times and feed samples into the running averages. If things
+@@ -438,7 +462,7 @@ static void psi_avgs_work(struct work_struct *work)
+ if (now >= group->avg_next_update)
+ group->avg_next_update = update_averages(group, now);
+
+- if (nonidle) {
++ if (changed_states & PSI_STATE_RESCHEDULE) {
+ schedule_delayed_work(dwork, nsecs_to_jiffies(
+ group->avg_next_update - now) + 1);
+ }
+--
+2.39.2
+
--- /dev/null
+From c64ea43f91987426ad1c79576bec5a3f7421d28d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:15 +0200
+Subject: sched/psi: Rearrange polling code in preparation
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 7fab21fa0d000a0ea32d73ce8eec68557c6c268b ]
+
+Move a few functions up in the file to avoid forward declaration needed
+in the patch implementing unprivileged PSI triggers.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-2-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/sched/psi.c | 196 ++++++++++++++++++++++-----------------------
+ 1 file changed, 98 insertions(+), 98 deletions(-)
+
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index 02e011cabe917..fe9269f1d2a46 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -384,92 +384,6 @@ static void collect_percpu_times(struct psi_group *group,
+ *pchanged_states = changed_states;
+ }
+
+-static u64 update_averages(struct psi_group *group, u64 now)
+-{
+- unsigned long missed_periods = 0;
+- u64 expires, period;
+- u64 avg_next_update;
+- int s;
+-
+- /* avgX= */
+- expires = group->avg_next_update;
+- if (now - expires >= psi_period)
+- missed_periods = div_u64(now - expires, psi_period);
+-
+- /*
+- * The periodic clock tick can get delayed for various
+- * reasons, especially on loaded systems. To avoid clock
+- * drift, we schedule the clock in fixed psi_period intervals.
+- * But the deltas we sample out of the per-cpu buckets above
+- * are based on the actual time elapsing between clock ticks.
+- */
+- avg_next_update = expires + ((1 + missed_periods) * psi_period);
+- period = now - (group->avg_last_update + (missed_periods * psi_period));
+- group->avg_last_update = now;
+-
+- for (s = 0; s < NR_PSI_STATES - 1; s++) {
+- u32 sample;
+-
+- sample = group->total[PSI_AVGS][s] - group->avg_total[s];
+- /*
+- * Due to the lockless sampling of the time buckets,
+- * recorded time deltas can slip into the next period,
+- * which under full pressure can result in samples in
+- * excess of the period length.
+- *
+- * We don't want to report non-sensical pressures in
+- * excess of 100%, nor do we want to drop such events
+- * on the floor. Instead we punt any overage into the
+- * future until pressure subsides. By doing this we
+- * don't underreport the occurring pressure curve, we
+- * just report it delayed by one period length.
+- *
+- * The error isn't cumulative. As soon as another
+- * delta slips from a period P to P+1, by definition
+- * it frees up its time T in P.
+- */
+- if (sample > period)
+- sample = period;
+- group->avg_total[s] += sample;
+- calc_avgs(group->avg[s], missed_periods, sample, period);
+- }
+-
+- return avg_next_update;
+-}
+-
+-static void psi_avgs_work(struct work_struct *work)
+-{
+- struct delayed_work *dwork;
+- struct psi_group *group;
+- u32 changed_states;
+- u64 now;
+-
+- dwork = to_delayed_work(work);
+- group = container_of(dwork, struct psi_group, avgs_work);
+-
+- mutex_lock(&group->avgs_lock);
+-
+- now = sched_clock();
+-
+- collect_percpu_times(group, PSI_AVGS, &changed_states);
+- /*
+- * If there is task activity, periodically fold the per-cpu
+- * times and feed samples into the running averages. If things
+- * are idle and there is no data to process, stop the clock.
+- * Once restarted, we'll catch up the running averages in one
+- * go - see calc_avgs() and missed_periods.
+- */
+- if (now >= group->avg_next_update)
+- group->avg_next_update = update_averages(group, now);
+-
+- if (changed_states & PSI_STATE_RESCHEDULE) {
+- schedule_delayed_work(dwork, nsecs_to_jiffies(
+- group->avg_next_update - now) + 1);
+- }
+-
+- mutex_unlock(&group->avgs_lock);
+-}
+-
+ /* Trigger tracking window manipulations */
+ static void window_reset(struct psi_window *win, u64 now, u64 value,
+ u64 prev_growth)
+@@ -516,18 +430,6 @@ static u64 window_update(struct psi_window *win, u64 now, u64 value)
+ return growth;
+ }
+
+-static void init_triggers(struct psi_group *group, u64 now)
+-{
+- struct psi_trigger *t;
+-
+- list_for_each_entry(t, &group->triggers, node)
+- window_reset(&t->win, now,
+- group->total[PSI_POLL][t->state], 0);
+- memcpy(group->polling_total, group->total[PSI_POLL],
+- sizeof(group->polling_total));
+- group->polling_next_update = now + group->poll_min_period;
+-}
+-
+ static u64 update_triggers(struct psi_group *group, u64 now)
+ {
+ struct psi_trigger *t;
+@@ -590,6 +492,104 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+ return now + group->poll_min_period;
+ }
+
++static u64 update_averages(struct psi_group *group, u64 now)
++{
++ unsigned long missed_periods = 0;
++ u64 expires, period;
++ u64 avg_next_update;
++ int s;
++
++ /* avgX= */
++ expires = group->avg_next_update;
++ if (now - expires >= psi_period)
++ missed_periods = div_u64(now - expires, psi_period);
++
++ /*
++ * The periodic clock tick can get delayed for various
++ * reasons, especially on loaded systems. To avoid clock
++ * drift, we schedule the clock in fixed psi_period intervals.
++ * But the deltas we sample out of the per-cpu buckets above
++ * are based on the actual time elapsing between clock ticks.
++ */
++ avg_next_update = expires + ((1 + missed_periods) * psi_period);
++ period = now - (group->avg_last_update + (missed_periods * psi_period));
++ group->avg_last_update = now;
++
++ for (s = 0; s < NR_PSI_STATES - 1; s++) {
++ u32 sample;
++
++ sample = group->total[PSI_AVGS][s] - group->avg_total[s];
++ /*
++ * Due to the lockless sampling of the time buckets,
++ * recorded time deltas can slip into the next period,
++ * which under full pressure can result in samples in
++ * excess of the period length.
++ *
++ * We don't want to report non-sensical pressures in
++ * excess of 100%, nor do we want to drop such events
++ * on the floor. Instead we punt any overage into the
++ * future until pressure subsides. By doing this we
++ * don't underreport the occurring pressure curve, we
++ * just report it delayed by one period length.
++ *
++ * The error isn't cumulative. As soon as another
++ * delta slips from a period P to P+1, by definition
++ * it frees up its time T in P.
++ */
++ if (sample > period)
++ sample = period;
++ group->avg_total[s] += sample;
++ calc_avgs(group->avg[s], missed_periods, sample, period);
++ }
++
++ return avg_next_update;
++}
++
++static void psi_avgs_work(struct work_struct *work)
++{
++ struct delayed_work *dwork;
++ struct psi_group *group;
++ u32 changed_states;
++ u64 now;
++
++ dwork = to_delayed_work(work);
++ group = container_of(dwork, struct psi_group, avgs_work);
++
++ mutex_lock(&group->avgs_lock);
++
++ now = sched_clock();
++
++ collect_percpu_times(group, PSI_AVGS, &changed_states);
++ /*
++ * If there is task activity, periodically fold the per-cpu
++ * times and feed samples into the running averages. If things
++ * are idle and there is no data to process, stop the clock.
++ * Once restarted, we'll catch up the running averages in one
++ * go - see calc_avgs() and missed_periods.
++ */
++ if (now >= group->avg_next_update)
++ group->avg_next_update = update_averages(group, now);
++
++ if (changed_states & PSI_STATE_RESCHEDULE) {
++ schedule_delayed_work(dwork, nsecs_to_jiffies(
++ group->avg_next_update - now) + 1);
++ }
++
++ mutex_unlock(&group->avgs_lock);
++}
++
++static void init_triggers(struct psi_group *group, u64 now)
++{
++ struct psi_trigger *t;
++
++ list_for_each_entry(t, &group->triggers, node)
++ window_reset(&t->win, now,
++ group->total[PSI_POLL][t->state], 0);
++ memcpy(group->polling_total, group->total[PSI_POLL],
++ sizeof(group->polling_total));
++ group->polling_next_update = now + group->poll_min_period;
++}
++
+ /* Schedule polling if it's not already scheduled or forced. */
+ static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
+ bool force)
+--
+2.39.2
+
--- /dev/null
+From 0970d615d9b33fac51e3ce6bebe313abcf75dfe9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 30 Mar 2023 12:54:16 +0200
+Subject: sched/psi: Rename existing poll members in preparation
+
+From: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+
+[ Upstream commit 65457b74aa9437418e552e8d52d7112d4f9901a6 ]
+
+Renaming in PSI implementation to make a clear distinction between
+privileged and unprivileged triggers code to be implemented in the
+next patch.
+
+Suggested-by: Johannes Weiner <hannes@cmpxchg.org>
+Signed-off-by: Domenico Cerasuolo <cerasuolodomenico@gmail.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Acked-by: Johannes Weiner <hannes@cmpxchg.org>
+Link: https://lore.kernel.org/r/20230330105418.77061-3-cerasuolodomenico@gmail.com
+Stable-dep-of: aff037078eca ("sched/psi: use kernfs polling functions for PSI trigger polling")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi_types.h | 36 ++++-----
+ kernel/sched/psi.c | 163 +++++++++++++++++++-------------------
+ 2 files changed, 100 insertions(+), 99 deletions(-)
+
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 1e0a0d7ace3af..1819afa8b1987 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -175,26 +175,26 @@ struct psi_group {
+ u64 total[NR_PSI_AGGREGATORS][NR_PSI_STATES - 1];
+ unsigned long avg[NR_PSI_STATES - 1][3];
+
+- /* Monitor work control */
+- struct task_struct __rcu *poll_task;
+- struct timer_list poll_timer;
+- wait_queue_head_t poll_wait;
+- atomic_t poll_wakeup;
+- atomic_t poll_scheduled;
++ /* Monitor RT polling work control */
++ struct task_struct __rcu *rtpoll_task;
++ struct timer_list rtpoll_timer;
++ wait_queue_head_t rtpoll_wait;
++ atomic_t rtpoll_wakeup;
++ atomic_t rtpoll_scheduled;
+
+ /* Protects data used by the monitor */
+- struct mutex trigger_lock;
+-
+- /* Configured polling triggers */
+- struct list_head triggers;
+- u32 nr_triggers[NR_PSI_STATES - 1];
+- u32 poll_states;
+- u64 poll_min_period;
+-
+- /* Total stall times at the start of monitor activation */
+- u64 polling_total[NR_PSI_STATES - 1];
+- u64 polling_next_update;
+- u64 polling_until;
++ struct mutex rtpoll_trigger_lock;
++
++ /* Configured RT polling triggers */
++ struct list_head rtpoll_triggers;
++ u32 rtpoll_nr_triggers[NR_PSI_STATES - 1];
++ u32 rtpoll_states;
++ u64 rtpoll_min_period;
++
++ /* Total stall times at the start of RT polling monitor activation */
++ u64 rtpoll_total[NR_PSI_STATES - 1];
++ u64 rtpoll_next_update;
++ u64 rtpoll_until;
+ };
+
+ #else /* CONFIG_PSI */
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index fe9269f1d2a46..a3d0b5cf797ab 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -189,14 +189,14 @@ static void group_init(struct psi_group *group)
+ INIT_DELAYED_WORK(&group->avgs_work, psi_avgs_work);
+ mutex_init(&group->avgs_lock);
+ /* Init trigger-related members */
+- atomic_set(&group->poll_scheduled, 0);
+- mutex_init(&group->trigger_lock);
+- INIT_LIST_HEAD(&group->triggers);
+- group->poll_min_period = U32_MAX;
+- group->polling_next_update = ULLONG_MAX;
+- init_waitqueue_head(&group->poll_wait);
+- timer_setup(&group->poll_timer, poll_timer_fn, 0);
+- rcu_assign_pointer(group->poll_task, NULL);
++ atomic_set(&group->rtpoll_scheduled, 0);
++ mutex_init(&group->rtpoll_trigger_lock);
++ INIT_LIST_HEAD(&group->rtpoll_triggers);
++ group->rtpoll_min_period = U32_MAX;
++ group->rtpoll_next_update = ULLONG_MAX;
++ init_waitqueue_head(&group->rtpoll_wait);
++ timer_setup(&group->rtpoll_timer, poll_timer_fn, 0);
++ rcu_assign_pointer(group->rtpoll_task, NULL);
+ }
+
+ void __init psi_init(void)
+@@ -440,11 +440,11 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+ * On subsequent updates, calculate growth deltas and let
+ * watchers know when their specified thresholds are exceeded.
+ */
+- list_for_each_entry(t, &group->triggers, node) {
++ list_for_each_entry(t, &group->rtpoll_triggers, node) {
+ u64 growth;
+ bool new_stall;
+
+- new_stall = group->polling_total[t->state] != total[t->state];
++ new_stall = group->rtpoll_total[t->state] != total[t->state];
+
+ /* Check for stall activity or a previous threshold breach */
+ if (!new_stall && !t->pending_event)
+@@ -486,10 +486,10 @@ static u64 update_triggers(struct psi_group *group, u64 now)
+ }
+
+ if (update_total)
+- memcpy(group->polling_total, total,
+- sizeof(group->polling_total));
++ memcpy(group->rtpoll_total, total,
++ sizeof(group->rtpoll_total));
+
+- return now + group->poll_min_period;
++ return now + group->rtpoll_min_period;
+ }
+
+ static u64 update_averages(struct psi_group *group, u64 now)
+@@ -582,53 +582,53 @@ static void init_triggers(struct psi_group *group, u64 now)
+ {
+ struct psi_trigger *t;
+
+- list_for_each_entry(t, &group->triggers, node)
++ list_for_each_entry(t, &group->rtpoll_triggers, node)
+ window_reset(&t->win, now,
+ group->total[PSI_POLL][t->state], 0);
+- memcpy(group->polling_total, group->total[PSI_POLL],
+- sizeof(group->polling_total));
+- group->polling_next_update = now + group->poll_min_period;
++ memcpy(group->rtpoll_total, group->total[PSI_POLL],
++ sizeof(group->rtpoll_total));
++ group->rtpoll_next_update = now + group->rtpoll_min_period;
+ }
+
+ /* Schedule polling if it's not already scheduled or forced. */
+-static void psi_schedule_poll_work(struct psi_group *group, unsigned long delay,
++static void psi_schedule_rtpoll_work(struct psi_group *group, unsigned long delay,
+ bool force)
+ {
+ struct task_struct *task;
+
+ /*
+ * atomic_xchg should be called even when !force to provide a
+- * full memory barrier (see the comment inside psi_poll_work).
++ * full memory barrier (see the comment inside psi_rtpoll_work).
+ */
+- if (atomic_xchg(&group->poll_scheduled, 1) && !force)
++ if (atomic_xchg(&group->rtpoll_scheduled, 1) && !force)
+ return;
+
+ rcu_read_lock();
+
+- task = rcu_dereference(group->poll_task);
++ task = rcu_dereference(group->rtpoll_task);
+ /*
+ * kworker might be NULL in case psi_trigger_destroy races with
+ * psi_task_change (hotpath) which can't use locks
+ */
+ if (likely(task))
+- mod_timer(&group->poll_timer, jiffies + delay);
++ mod_timer(&group->rtpoll_timer, jiffies + delay);
+ else
+- atomic_set(&group->poll_scheduled, 0);
++ atomic_set(&group->rtpoll_scheduled, 0);
+
+ rcu_read_unlock();
+ }
+
+-static void psi_poll_work(struct psi_group *group)
++static void psi_rtpoll_work(struct psi_group *group)
+ {
+ bool force_reschedule = false;
+ u32 changed_states;
+ u64 now;
+
+- mutex_lock(&group->trigger_lock);
++ mutex_lock(&group->rtpoll_trigger_lock);
+
+ now = sched_clock();
+
+- if (now > group->polling_until) {
++ if (now > group->rtpoll_until) {
+ /*
+ * We are either about to start or might stop polling if no
+ * state change was recorded. Resetting poll_scheduled leaves
+@@ -638,7 +638,7 @@ static void psi_poll_work(struct psi_group *group)
+ * should be negligible and polling_next_update still keeps
+ * updates correctly on schedule.
+ */
+- atomic_set(&group->poll_scheduled, 0);
++ atomic_set(&group->rtpoll_scheduled, 0);
+ /*
+ * A task change can race with the poll worker that is supposed to
+ * report on it. To avoid missing events, ensure ordering between
+@@ -667,9 +667,9 @@ static void psi_poll_work(struct psi_group *group)
+
+ collect_percpu_times(group, PSI_POLL, &changed_states);
+
+- if (changed_states & group->poll_states) {
++ if (changed_states & group->rtpoll_states) {
+ /* Initialize trigger windows when entering polling mode */
+- if (now > group->polling_until)
++ if (now > group->rtpoll_until)
+ init_triggers(group, now);
+
+ /*
+@@ -677,50 +677,50 @@ static void psi_poll_work(struct psi_group *group)
+ * minimum tracking window as long as monitor states are
+ * changing.
+ */
+- group->polling_until = now +
+- group->poll_min_period * UPDATES_PER_WINDOW;
++ group->rtpoll_until = now +
++ group->rtpoll_min_period * UPDATES_PER_WINDOW;
+ }
+
+- if (now > group->polling_until) {
+- group->polling_next_update = ULLONG_MAX;
++ if (now > group->rtpoll_until) {
++ group->rtpoll_next_update = ULLONG_MAX;
+ goto out;
+ }
+
+- if (now >= group->polling_next_update)
+- group->polling_next_update = update_triggers(group, now);
++ if (now >= group->rtpoll_next_update)
++ group->rtpoll_next_update = update_triggers(group, now);
+
+- psi_schedule_poll_work(group,
+- nsecs_to_jiffies(group->polling_next_update - now) + 1,
++ psi_schedule_rtpoll_work(group,
++ nsecs_to_jiffies(group->rtpoll_next_update - now) + 1,
+ force_reschedule);
+
+ out:
+- mutex_unlock(&group->trigger_lock);
++ mutex_unlock(&group->rtpoll_trigger_lock);
+ }
+
+-static int psi_poll_worker(void *data)
++static int psi_rtpoll_worker(void *data)
+ {
+ struct psi_group *group = (struct psi_group *)data;
+
+ sched_set_fifo_low(current);
+
+ while (true) {
+- wait_event_interruptible(group->poll_wait,
+- atomic_cmpxchg(&group->poll_wakeup, 1, 0) ||
++ wait_event_interruptible(group->rtpoll_wait,
++ atomic_cmpxchg(&group->rtpoll_wakeup, 1, 0) ||
+ kthread_should_stop());
+ if (kthread_should_stop())
+ break;
+
+- psi_poll_work(group);
++ psi_rtpoll_work(group);
+ }
+ return 0;
+ }
+
+ static void poll_timer_fn(struct timer_list *t)
+ {
+- struct psi_group *group = from_timer(group, t, poll_timer);
++ struct psi_group *group = from_timer(group, t, rtpoll_timer);
+
+- atomic_set(&group->poll_wakeup, 1);
+- wake_up_interruptible(&group->poll_wait);
++ atomic_set(&group->rtpoll_wakeup, 1);
++ wake_up_interruptible(&group->rtpoll_wait);
+ }
+
+ static void record_times(struct psi_group_cpu *groupc, u64 now)
+@@ -851,8 +851,8 @@ static void psi_group_change(struct psi_group *group, int cpu,
+
+ write_seqcount_end(&groupc->seq);
+
+- if (state_mask & group->poll_states)
+- psi_schedule_poll_work(group, 1, false);
++ if (state_mask & group->rtpoll_states)
++ psi_schedule_rtpoll_work(group, 1, false);
+
+ if (wake_clock && !delayed_work_pending(&group->avgs_work))
+ schedule_delayed_work(&group->avgs_work, PSI_FREQ);
+@@ -1005,8 +1005,8 @@ void psi_account_irqtime(struct task_struct *task, u32 delta)
+
+ write_seqcount_end(&groupc->seq);
+
+- if (group->poll_states & (1 << PSI_IRQ_FULL))
+- psi_schedule_poll_work(group, 1, false);
++ if (group->rtpoll_states & (1 << PSI_IRQ_FULL))
++ psi_schedule_rtpoll_work(group, 1, false);
+ } while ((group = group->parent));
+ }
+ #endif
+@@ -1101,7 +1101,7 @@ void psi_cgroup_free(struct cgroup *cgroup)
+ cancel_delayed_work_sync(&cgroup->psi->avgs_work);
+ free_percpu(cgroup->psi->pcpu);
+ /* All triggers must be removed by now */
+- WARN_ONCE(cgroup->psi->poll_states, "psi: trigger leak\n");
++ WARN_ONCE(cgroup->psi->rtpoll_states, "psi: trigger leak\n");
+ kfree(cgroup->psi);
+ }
+
+@@ -1302,29 +1302,29 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+ init_waitqueue_head(&t->event_wait);
+ t->pending_event = false;
+
+- mutex_lock(&group->trigger_lock);
++ mutex_lock(&group->rtpoll_trigger_lock);
+
+- if (!rcu_access_pointer(group->poll_task)) {
++ if (!rcu_access_pointer(group->rtpoll_task)) {
+ struct task_struct *task;
+
+- task = kthread_create(psi_poll_worker, group, "psimon");
++ task = kthread_create(psi_rtpoll_worker, group, "psimon");
+ if (IS_ERR(task)) {
+ kfree(t);
+- mutex_unlock(&group->trigger_lock);
++ mutex_unlock(&group->rtpoll_trigger_lock);
+ return ERR_CAST(task);
+ }
+- atomic_set(&group->poll_wakeup, 0);
++ atomic_set(&group->rtpoll_wakeup, 0);
+ wake_up_process(task);
+- rcu_assign_pointer(group->poll_task, task);
++ rcu_assign_pointer(group->rtpoll_task, task);
+ }
+
+- list_add(&t->node, &group->triggers);
+- group->poll_min_period = min(group->poll_min_period,
++ list_add(&t->node, &group->rtpoll_triggers);
++ group->rtpoll_min_period = min(group->rtpoll_min_period,
+ div_u64(t->win.size, UPDATES_PER_WINDOW));
+- group->nr_triggers[t->state]++;
+- group->poll_states |= (1 << t->state);
++ group->rtpoll_nr_triggers[t->state]++;
++ group->rtpoll_states |= (1 << t->state);
+
+- mutex_unlock(&group->trigger_lock);
++ mutex_unlock(&group->rtpoll_trigger_lock);
+
+ return t;
+ }
+@@ -1349,51 +1349,52 @@ void psi_trigger_destroy(struct psi_trigger *t)
+ */
+ wake_up_pollfree(&t->event_wait);
+
+- mutex_lock(&group->trigger_lock);
++ mutex_lock(&group->rtpoll_trigger_lock);
+
+ if (!list_empty(&t->node)) {
+ struct psi_trigger *tmp;
+ u64 period = ULLONG_MAX;
+
+ list_del(&t->node);
+- group->nr_triggers[t->state]--;
+- if (!group->nr_triggers[t->state])
+- group->poll_states &= ~(1 << t->state);
++ group->rtpoll_nr_triggers[t->state]--;
++ if (!group->rtpoll_nr_triggers[t->state])
++ group->rtpoll_states &= ~(1 << t->state);
+ /* reset min update period for the remaining triggers */
+- list_for_each_entry(tmp, &group->triggers, node)
++ list_for_each_entry(tmp, &group->rtpoll_triggers, node)
+ period = min(period, div_u64(tmp->win.size,
+ UPDATES_PER_WINDOW));
+- group->poll_min_period = period;
+- /* Destroy poll_task when the last trigger is destroyed */
+- if (group->poll_states == 0) {
+- group->polling_until = 0;
++ group->rtpoll_min_period = period;
++ /* Destroy rtpoll_task when the last trigger is destroyed */
++ if (group->rtpoll_states == 0) {
++ group->rtpoll_until = 0;
+ task_to_destroy = rcu_dereference_protected(
+- group->poll_task,
+- lockdep_is_held(&group->trigger_lock));
+- rcu_assign_pointer(group->poll_task, NULL);
+- del_timer(&group->poll_timer);
++ group->rtpoll_task,
++ lockdep_is_held(&group->rtpoll_trigger_lock));
++ rcu_assign_pointer(group->rtpoll_task, NULL);
++ del_timer(&group->rtpoll_timer);
+ }
+ }
+
+- mutex_unlock(&group->trigger_lock);
++ mutex_unlock(&group->rtpoll_trigger_lock);
+
+ /*
+- * Wait for psi_schedule_poll_work RCU to complete its read-side
++ * Wait for psi_schedule_rtpoll_work RCU to complete its read-side
+ * critical section before destroying the trigger and optionally the
+- * poll_task.
++ * rtpoll_task.
+ */
+ synchronize_rcu();
+ /*
+- * Stop kthread 'psimon' after releasing trigger_lock to prevent a
+- * deadlock while waiting for psi_poll_work to acquire trigger_lock
++ * Stop kthread 'psimon' after releasing rtpoll_trigger_lock to prevent
++ * a deadlock while waiting for psi_rtpoll_work to acquire
++ * rtpoll_trigger_lock
+ */
+ if (task_to_destroy) {
+ /*
+ * After the RCU grace period has expired, the worker
+- * can no longer be found through group->poll_task.
++ * can no longer be found through group->rtpoll_task.
+ */
+ kthread_stop(task_to_destroy);
+- atomic_set(&group->poll_scheduled, 0);
++ atomic_set(&group->rtpoll_scheduled, 0);
+ }
+ kfree(t);
+ }
+--
+2.39.2
+
--- /dev/null
+From cc4a5d27580aad5472ec624bab19f12d4556982c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 17:56:12 -0700
+Subject: sched/psi: use kernfs polling functions for PSI trigger polling
+
+From: Suren Baghdasaryan <surenb@google.com>
+
+[ Upstream commit aff037078ecaecf34a7c2afab1341815f90fba5e ]
+
+Destroying psi trigger in cgroup_file_release causes UAF issues when
+a cgroup is removed from under a polling process. This is happening
+because cgroup removal causes a call to cgroup_file_release while the
+actual file is still alive. Destroying the trigger at this point would
+also destroy its waitqueue head and if there is still a polling process
+on that file accessing the waitqueue, it will step on the freed pointer:
+
+do_select
+ vfs_poll
+ do_rmdir
+ cgroup_rmdir
+ kernfs_drain_open_files
+ cgroup_file_release
+ cgroup_pressure_release
+ psi_trigger_destroy
+ wake_up_pollfree(&t->event_wait)
+// vfs_poll is unblocked
+ synchronize_rcu
+ kfree(t)
+ poll_freewait -> UAF access to the trigger's waitqueue head
+
+Patch [1] fixed this issue for epoll() case using wake_up_pollfree(),
+however the same issue exists for synchronous poll() case.
+The root cause of this issue is that the lifecycles of the psi trigger's
+waitqueue and of the file associated with the trigger are different. Fix
+this by using kernfs_generic_poll function when polling on cgroup-specific
+psi triggers. It internally uses kernfs_open_node->poll waitqueue head
+with its lifecycle tied to the file's lifecycle. This also renders the
+fix in [1] obsolete, so revert it.
+
+[1] commit c2dbe32d5db5 ("sched/psi: Fix use-after-free in ep_remove_wait_queue()")
+
+Fixes: 0e94682b73bf ("psi: introduce psi monitor")
+Closes: https://lore.kernel.org/all/20230613062306.101831-1-lujialin4@huawei.com/
+Reported-by: Lu Jialin <lujialin4@huawei.com>
+Signed-off-by: Suren Baghdasaryan <surenb@google.com>
+Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
+Link: https://lkml.kernel.org/r/20230630005612.1014540-1-surenb@google.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/psi.h | 5 +++--
+ include/linux/psi_types.h | 3 +++
+ kernel/cgroup/cgroup.c | 2 +-
+ kernel/sched/psi.c | 29 +++++++++++++++++++++--------
+ 4 files changed, 28 insertions(+), 11 deletions(-)
+
+diff --git a/include/linux/psi.h b/include/linux/psi.h
+index ab26200c28033..e0745873e3f26 100644
+--- a/include/linux/psi.h
++++ b/include/linux/psi.h
+@@ -23,8 +23,9 @@ void psi_memstall_enter(unsigned long *flags);
+ void psi_memstall_leave(unsigned long *flags);
+
+ int psi_show(struct seq_file *s, struct psi_group *group, enum psi_res res);
+-struct psi_trigger *psi_trigger_create(struct psi_group *group,
+- char *buf, enum psi_res res, struct file *file);
++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
++ enum psi_res res, struct file *file,
++ struct kernfs_open_file *of);
+ void psi_trigger_destroy(struct psi_trigger *t);
+
+ __poll_t psi_trigger_poll(void **trigger_ptr, struct file *file,
+diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h
+index 040c089581c6c..f1fd3a8044e0e 100644
+--- a/include/linux/psi_types.h
++++ b/include/linux/psi_types.h
+@@ -137,6 +137,9 @@ struct psi_trigger {
+ /* Wait queue for polling */
+ wait_queue_head_t event_wait;
+
++ /* Kernfs file for cgroup triggers */
++ struct kernfs_open_file *of;
++
+ /* Pending event flag */
+ int event;
+
+diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
+index c35efae566a4b..73f11e4db3a4d 100644
+--- a/kernel/cgroup/cgroup.c
++++ b/kernel/cgroup/cgroup.c
+@@ -3771,7 +3771,7 @@ static ssize_t pressure_write(struct kernfs_open_file *of, char *buf,
+ }
+
+ psi = cgroup_psi(cgrp);
+- new = psi_trigger_create(psi, buf, res, of->file);
++ new = psi_trigger_create(psi, buf, res, of->file, of);
+ if (IS_ERR(new)) {
+ cgroup_put(cgrp);
+ return PTR_ERR(new);
+diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
+index e072f6b31bf30..80d8c10e93638 100644
+--- a/kernel/sched/psi.c
++++ b/kernel/sched/psi.c
+@@ -494,8 +494,12 @@ static u64 update_triggers(struct psi_group *group, u64 now, bool *update_total,
+ continue;
+
+ /* Generate an event */
+- if (cmpxchg(&t->event, 0, 1) == 0)
+- wake_up_interruptible(&t->event_wait);
++ if (cmpxchg(&t->event, 0, 1) == 0) {
++ if (t->of)
++ kernfs_notify(t->of->kn);
++ else
++ wake_up_interruptible(&t->event_wait);
++ }
+ t->last_event_time = now;
+ /* Reset threshold breach flag once event got generated */
+ t->pending_event = false;
+@@ -1272,8 +1276,9 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
+ return 0;
+ }
+
+-struct psi_trigger *psi_trigger_create(struct psi_group *group,
+- char *buf, enum psi_res res, struct file *file)
++struct psi_trigger *psi_trigger_create(struct psi_group *group, char *buf,
++ enum psi_res res, struct file *file,
++ struct kernfs_open_file *of)
+ {
+ struct psi_trigger *t;
+ enum psi_states state;
+@@ -1333,7 +1338,9 @@ struct psi_trigger *psi_trigger_create(struct psi_group *group,
+
+ t->event = 0;
+ t->last_event_time = 0;
+- init_waitqueue_head(&t->event_wait);
++ t->of = of;
++ if (!of)
++ init_waitqueue_head(&t->event_wait);
+ t->pending_event = false;
+ t->aggregator = privileged ? PSI_POLL : PSI_AVGS;
+
+@@ -1390,7 +1397,10 @@ void psi_trigger_destroy(struct psi_trigger *t)
+ * being accessed later. Can happen if cgroup is deleted from under a
+ * polling process.
+ */
+- wake_up_pollfree(&t->event_wait);
++ if (t->of)
++ kernfs_notify(t->of->kn);
++ else
++ wake_up_interruptible(&t->event_wait);
+
+ if (t->aggregator == PSI_AVGS) {
+ mutex_lock(&group->avgs_lock);
+@@ -1462,7 +1472,10 @@ __poll_t psi_trigger_poll(void **trigger_ptr,
+ if (!t)
+ return DEFAULT_POLLMASK | EPOLLERR | EPOLLPRI;
+
+- poll_wait(file, &t->event_wait, wait);
++ if (t->of)
++ kernfs_generic_poll(t->of, wait);
++ else
++ poll_wait(file, &t->event_wait, wait);
+
+ if (cmpxchg(&t->event, 1, 0) == 1)
+ ret |= EPOLLPRI;
+@@ -1532,7 +1545,7 @@ static ssize_t psi_write(struct file *file, const char __user *user_buf,
+ return -EBUSY;
+ }
+
+- new = psi_trigger_create(&psi_system, buf, res, file);
++ new = psi_trigger_create(&psi_system, buf, res, file, NULL);
+ if (IS_ERR(new)) {
+ mutex_unlock(&seq->lock);
+ return PTR_ERR(new);
+--
+2.39.2
+
--- /dev/null
+From d5bcc1aba8ad5267a2fd8d1da3794a97630d9c16 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 14 Jun 2023 10:18:25 +0800
+Subject: security: keys: Modify mismatched function name
+
+From: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+
+[ Upstream commit 2a4152742025c5f21482e8cebc581702a0fa5b01 ]
+
+No functional modification involved.
+
+security/keys/trusted-keys/trusted_tpm2.c:203: warning: expecting prototype for tpm_buf_append_auth(). Prototype was for tpm2_buf_append_auth() instead.
+
+Fixes: 2e19e10131a0 ("KEYS: trusted: Move TPM2 trusted keys code")
+Reported-by: Abaci Robot <abaci@linux.alibaba.com>
+Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=5524
+Signed-off-by: Jiapeng Chong <jiapeng.chong@linux.alibaba.com>
+Reviewed-by: Paul Moore <paul@paul-moore.com>
+Signed-off-by: Jarkko Sakkinen <jarkko@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ security/keys/trusted-keys/trusted_tpm2.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/security/keys/trusted-keys/trusted_tpm2.c b/security/keys/trusted-keys/trusted_tpm2.c
+index 2b2c8eb258d5b..bc700f85f80be 100644
+--- a/security/keys/trusted-keys/trusted_tpm2.c
++++ b/security/keys/trusted-keys/trusted_tpm2.c
+@@ -186,7 +186,7 @@ int tpm2_key_priv(void *context, size_t hdrlen,
+ }
+
+ /**
+- * tpm_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
++ * tpm2_buf_append_auth() - append TPMS_AUTH_COMMAND to the buffer.
+ *
+ * @buf: an allocated tpm_buf instance
+ * @session_handle: session handle
+--
+2.39.2
+
asoc-codecs-wcd938x-fix-codec-initialisation-race.patch
asoc-codecs-wcd938x-fix-soundwire-initialisation-race.patch
ext4-correct-inline-offset-when-handling-xattrs-in-inode-body.patch
+drm-radeon-fix-integer-overflow-in-radeon_cs_parser_.patch
+alsa-emu10k1-roll-up-loops-in-dsp-setup-code-for-aud.patch
+quota-properly-disable-quotas-when-add_dquot_ref-fai.patch
+quota-fix-warning-in-dqgrab.patch
+hid-add-quirk-for-03f0-464a-hp-elite-presenter-mouse.patch
+ovl-check-type-and-offset-of-struct-vfsmount-in-ovl_.patch
+udf-fix-uninitialized-array-access-for-some-pathname.patch
+fs-jfs-fix-ubsan-array-index-out-of-bounds-in-dballo.patch
+mips-dec-prom-address-warray-bounds-warning.patch
+fs-jfs-fix-null-ptr-deref-read-in-txbegin.patch
+fs-jfs-check-for-read-only-mounted-filesystem-in-txb.patch
+acpi-video-add-backlight-native-dmi-quirk-for-dell-s.patch
+rcu-tasks-avoid-pr_info-with-spin-lock-in-cblist_ini.patch
+rcu-mark-additional-concurrent-load-from-cpu_no_qs.b.patch
+sched-fair-don-t-balance-task-to-its-current-running.patch
+wifi-ath11k-fix-registration-of-6ghz-only-phy-withou.patch
+bpf-print-a-warning-only-if-writing-to-unprivileged_.patch
+bpf-address-kcsan-report-on-bpf_lru_list.patch
+bpf-tcp-avoid-taking-fast-sock-lock-in-iterator.patch
+wifi-ath11k-add-support-default-regdb-while-searchin.patch
+wifi-mac80211_hwsim-fix-possible-null-dereference.patch
+spi-dw-add-compatible-for-intel-mount-evans-soc.patch
+wifi-ath11k-fix-memory-leak-in-wmi-firmware-stats.patch
+net-ethernet-litex-add-support-for-64-bit-stats.patch
+devlink-report-devlink_port_type_warn-source-device.patch
+wifi-wext-core-fix-wstringop-overflow-warning-in-ioc.patch
+wifi-iwlwifi-add-support-for-new-pci-id.patch
+wifi-iwlwifi-mvm-avoid-baid-size-integer-overflow.patch
+wifi-iwlwifi-pcie-add-device-id-51f1-for-killer-1675.patch
+igb-fix-igb_down-hung-on-surprise-removal.patch
+net-hns3-fix-strncpy-not-using-dest-buf-length-as-le.patch
+asoc-amd-acp-fix-for-invalid-dai-id-handling-in-acp_.patch
+asoc-codecs-wcd938x-fix-mbhc-impedance-loglevel.patch
+asoc-codecs-wcd938x-fix-db-range-for-hphl-and-hphr.patch
+asoc-qcom-q6apm-do-not-close-gpr-port-before-closing.patch
+sched-fair-use-recent_used_cpu-to-test-p-cpus_ptr.patch
+sched-psi-fix-avgs_work-re-arm-in-psi_avgs_work.patch
+sched-psi-rearrange-polling-code-in-preparation.patch
+sched-psi-rename-existing-poll-members-in-preparatio.patch
+sched-psi-extract-update_triggers-side-effect.patch
+sched-psi-allow-unprivileged-polling-of-n-2s-period.patch
+sched-psi-use-kernfs-polling-functions-for-psi-trigg.patch
+pinctrl-renesas-rzv2m-handle-non-unique-subnode-name.patch
+pinctrl-renesas-rzg2l-handle-non-unique-subnode-name.patch
+spi-bcm63xx-fix-max-prepend-length.patch
+fbdev-imxfb-warn-about-invalid-left-right-margin.patch
+fbdev-imxfb-removed-unneeded-release_mem_region.patch
+perf-build-fix-library-not-found-error-when-using-cs.patch
+btrfs-be-a-bit-more-careful-when-setting-mirror_num_.patch
+spi-s3c64xx-clear-loopback-bit-after-loopback-test.patch
+kallsyms-improve-the-performance-of-kallsyms_lookup_.patch
+kallsyms-correctly-sequence-symbols-when-config_lto_.patch
+kallsyms-strip-lto-only-suffixes-from-promoted-globa.patch
+dsa-mv88e6xxx-do-a-final-check-before-timing-out.patch
+net-ethernet-ti-cpsw_ale-fix-cpsw_ale_get_field-cpsw.patch
+bridge-add-extack-warning-when-enabling-stp-in-netns.patch
+net-ethernet-mtk_eth_soc-handle-probe-deferral.patch
+cifs-fix-mid-leak-during-reconnection-after-timeout-.patch
+asoc-sof-ipc3-dtrace-uninitialized-data-in-dfsentry_.patch
+net-sched-cls_matchall-undo-tcf_bind_filter-in-case-.patch
+net-sched-cls_u32-undo-tcf_bind_filter-if-u32_replac.patch
+net-sched-cls_u32-undo-refcount-decrement-in-case-up.patch
+net-sched-cls_bpf-undo-tcf_bind_filter-in-case-of-an.patch
+net-dsa-microchip-ksz8-separate-static-mac-table-ope.patch
+net-dsa-microchip-ksz8-make-ksz8_r_sta_mac_table-sta.patch
+net-dsa-microchip-ksz8_r_sta_mac_table-avoid-using-e.patch
+net-dsa-microchip-correct-ksz8795-static-mac-table-a.patch
+iavf-fix-use-after-free-in-free_netdev.patch
+iavf-fix-out-of-bounds-when-setting-channels-on-remo.patch
+iavf-use-internal-state-to-free-traffic-irqs.patch
+iavf-move-netdev_update_features-into-watchdog-task.patch
+iavf-send-vlan-offloading-caps-once-after-vfr.patch
+iavf-make-functions-static-where-possible.patch
+iavf-wait-for-reset-in-callbacks-which-trigger-it.patch
+iavf-fix-a-deadlock-caused-by-rtnl-and-driver-s-lock.patch
+iavf-fix-reset-task-race-with-iavf_remove.patch
+security-keys-modify-mismatched-function-name.patch
+octeontx2-pf-dont-allocate-bpids-for-lbk-interfaces.patch
+bpf-fix-subprog-idx-logic-in-check_max_stack_depth.patch
+bpf-repeat-check_max_stack_depth-for-async-callbacks.patch
+bpf-arm64-fix-bti-type-used-for-freplace-attached-fu.patch
+igc-avoid-transmit-queue-timeout-for-xdp.patch
+igc-prevent-garbled-tx-queue-with-xdp-zerocopy.patch
+net-ipv4-use-consistent-txhash-in-time_wait-and-syn_.patch
+tcp-annotate-data-races-around-tcp_rsk-req-txhash.patch
+tcp-annotate-data-races-around-tcp_rsk-req-ts_recent.patch
+net-ipv4-use-kfree_sensitive-instead-of-kfree.patch
+net-ipv6-check-return-value-of-pskb_trim.patch
+revert-tcp-avoid-the-lookup-process-failing-to-get-s.patch
+fbdev-au1200fb-fix-missing-irq-check-in-au1200fb_drv.patch
+llc-don-t-drop-packet-from-non-root-netns.patch
+alsa-hda-realtek-fix-generic-fixup-definition-for-cs.patch
+netfilter-nf_tables-fix-spurious-set-element-inserti.patch
+netfilter-nf_tables-can-t-schedule-in-nft_chain_vali.patch
+netfilter-nft_set_pipapo-fix-improper-element-remova.patch
+netfilter-nf_tables-skip-bound-chain-in-netns-releas.patch
+netfilter-nf_tables-skip-bound-chain-on-rule-flush.patch
+bluetooth-use-rcu-for-hci_conn_params-and-iterate-sa.patch
+bluetooth-hci_event-call-disconnect-callback-before-.patch
+bluetooth-iso-fix-iso_conn-related-locking-and-valid.patch
+bluetooth-hci_sync-avoid-use-after-free-in-dbg-for-h.patch
+tcp-annotate-data-races-around-tp-tcp_tx_delay.patch
+tcp-annotate-data-races-around-tp-tsoffset.patch
+tcp-annotate-data-races-around-tp-keepalive_time.patch
+tcp-annotate-data-races-around-tp-keepalive_intvl.patch
+tcp-annotate-data-races-around-tp-keepalive_probes.patch
+tcp-annotate-data-races-around-icsk-icsk_syn_retries.patch
+tcp-annotate-data-races-around-tp-linger2.patch
+tcp-annotate-data-races-around-rskq_defer_accept.patch
+tcp-annotate-data-races-around-tp-notsent_lowat.patch
+tcp-annotate-data-races-around-icsk-icsk_user_timeou.patch
+tcp-annotate-data-races-around-fastopenq.max_qlen.patch
+net-phy-prevent-stale-pointer-dereference-in-phy_ini.patch
--- /dev/null
+From cf5e36388cb882c6653cd3159ae15b19b12d882e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 29 Jun 2023 09:14:52 +0200
+Subject: spi: bcm63xx: fix max prepend length
+
+From: Jonas Gorski <jonas.gorski@gmail.com>
+
+[ Upstream commit 5158814cbb37bbb38344b3ecddc24ba2ed0365f2 ]
+
+The command word is defined as following:
+
+ /* Command */
+ #define SPI_CMD_COMMAND_SHIFT 0
+ #define SPI_CMD_DEVICE_ID_SHIFT 4
+ #define SPI_CMD_PREPEND_BYTE_CNT_SHIFT 8
+ #define SPI_CMD_ONE_BYTE_SHIFT 11
+ #define SPI_CMD_ONE_WIRE_SHIFT 12
+
+If the prepend byte count field starts at bit 8, and the next defined
+bit is SPI_CMD_ONE_BYTE at bit 11, it can be at most 3 bits wide, and
+thus the max value is 7, not 15.
+
+Fixes: b17de076062a ("spi/bcm63xx: work around inability to keep CS up")
+Signed-off-by: Jonas Gorski <jonas.gorski@gmail.com>
+Link: https://lore.kernel.org/r/20230629071453.62024-1-jonas.gorski@gmail.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-bcm63xx.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/spi/spi-bcm63xx.c b/drivers/spi/spi-bcm63xx.c
+index 80fa0ef8909ca..147199002df1e 100644
+--- a/drivers/spi/spi-bcm63xx.c
++++ b/drivers/spi/spi-bcm63xx.c
+@@ -126,7 +126,7 @@ enum bcm63xx_regs_spi {
+ SPI_MSG_DATA_SIZE,
+ };
+
+-#define BCM63XX_SPI_MAX_PREPEND 15
++#define BCM63XX_SPI_MAX_PREPEND 7
+
+ #define BCM63XX_SPI_MAX_CS 8
+ #define BCM63XX_SPI_BUS_NUM 0
+--
+2.39.2
+
--- /dev/null
+From a47a909fedf766372d2d6e58a2e2e2694d9e1dfe Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 07:54:01 -0700
+Subject: spi: dw: Add compatible for Intel Mount Evans SoC
+
+From: Abe Kohandel <abe.kohandel@intel.com>
+
+[ Upstream commit 0760d5d0e9f0c0e2200a0323a61d1995bb745dee ]
+
+The Intel Mount Evans SoC's Integrated Management Complex uses the SPI
+controller for access to a NOR SPI FLASH. However, the SoC doesn't
+provide a mechanism to override the native chip select signal.
+
+This driver doesn't use DMA for memory operations when a chip select
+override is not provided due to the native chip select timing behavior.
+As a result no DMA configuration is done for the controller and this
+configuration is not tested.
+
+The controller also has an errata where a full TX FIFO can result in
+data corruption. The suggested workaround is to never completely fill
+the FIFO. The TX FIFO has a size of 32 so the fifo_len is set to 31.
+
+Signed-off-by: Abe Kohandel <abe.kohandel@intel.com>
+Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
+Link: https://lore.kernel.org/r/20230606145402.474866-2-abe.kohandel@intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-dw-mmio.c | 29 +++++++++++++++++++++++++++++
+ 1 file changed, 29 insertions(+)
+
+diff --git a/drivers/spi/spi-dw-mmio.c b/drivers/spi/spi-dw-mmio.c
+index 26c40ea6dd129..7e8478ad74e55 100644
+--- a/drivers/spi/spi-dw-mmio.c
++++ b/drivers/spi/spi-dw-mmio.c
+@@ -222,6 +222,31 @@ static int dw_spi_intel_init(struct platform_device *pdev,
+ return 0;
+ }
+
++/*
++ * The Intel Mount Evans SoC's Integrated Management Complex uses the
++ * SPI controller for access to a NOR SPI FLASH. However, the SoC doesn't
++ * provide a mechanism to override the native chip select signal.
++ *
++ * This driver doesn't use DMA for memory operations when a chip select
++ * override is not provided due to the native chip select timing behavior.
++ * As a result no DMA configuration is done for the controller and this
++ * configuration is not tested.
++ */
++static int dw_spi_mountevans_imc_init(struct platform_device *pdev,
++ struct dw_spi_mmio *dwsmmio)
++{
++ /*
++ * The Intel Mount Evans SoC's Integrated Management Complex DW
++ * apb_ssi_v4.02a controller has an errata where a full TX FIFO can
++ * result in data corruption. The suggested workaround is to never
++ * completely fill the FIFO. The TX FIFO has a size of 32 so the
++ * fifo_len is set to 31.
++ */
++ dwsmmio->dws.fifo_len = 31;
++
++ return 0;
++}
++
+ static int dw_spi_canaan_k210_init(struct platform_device *pdev,
+ struct dw_spi_mmio *dwsmmio)
+ {
+@@ -350,6 +375,10 @@ static const struct of_device_id dw_spi_mmio_of_match[] = {
+ { .compatible = "snps,dwc-ssi-1.01a", .data = dw_spi_hssi_init},
+ { .compatible = "intel,keembay-ssi", .data = dw_spi_intel_init},
+ { .compatible = "intel,thunderbay-ssi", .data = dw_spi_intel_init},
++ {
++ .compatible = "intel,mountevans-imc-ssi",
++ .data = dw_spi_mountevans_imc_init,
++ },
+ { .compatible = "microchip,sparx5-spi", dw_spi_mscc_sparx5_init},
+ { .compatible = "canaan,k210-spi", dw_spi_canaan_k210_init},
+ { /* end of table */}
+--
+2.39.2
+
--- /dev/null
+From f832b5453eead49443949271d5828c464703455b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Jul 2023 17:20:20 +0900
+Subject: spi: s3c64xx: clear loopback bit after loopback test
+
+From: Jaewon Kim <jaewon02.kim@samsung.com>
+
+[ Upstream commit 9ec3c5517e22a12d2ff1b71e844f7913641460c6 ]
+
+When SPI loopback transfer is performed, S3C64XX_SPI_MODE_SELF_LOOPBACK
+bit still remained. It works as loopback even if the next transfer is
+not spi loopback mode.
+If not SPI_LOOP, needs to clear S3C64XX_SPI_MODE_SELF_LOOPBACK bit.
+
+Signed-off-by: Jaewon Kim <jaewon02.kim@samsung.com>
+Fixes: ffb7bcd3b27e ("spi: s3c64xx: support loopback mode")
+Reviewed-by: Chanho Park <chanho61.park@samsung.com>
+Link: https://lore.kernel.org/r/20230711082020.138165-1-jaewon02.kim@samsung.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/spi/spi-s3c64xx.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/spi/spi-s3c64xx.c b/drivers/spi/spi-s3c64xx.c
+index 71d324ec9a70a..1480df7b43b3f 100644
+--- a/drivers/spi/spi-s3c64xx.c
++++ b/drivers/spi/spi-s3c64xx.c
+@@ -668,6 +668,8 @@ static int s3c64xx_spi_config(struct s3c64xx_spi_driver_data *sdd)
+
+ if ((sdd->cur_mode & SPI_LOOP) && sdd->port_conf->has_loopback)
+ val |= S3C64XX_SPI_MODE_SELF_LOOPBACK;
++ else
++ val &= ~S3C64XX_SPI_MODE_SELF_LOOPBACK;
+
+ writel(val, regs + S3C64XX_SPI_MODE_CFG);
+
+--
+2.39.2
+
--- /dev/null
+From 7035bedf31a88876c025d69b93d6ebb0256f36f7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:57 +0000
+Subject: tcp: annotate data-races around fastopenq.max_qlen
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 70f360dd7042cb843635ece9d28335a4addff9eb ]
+
+This field can be read locklessly.
+
+Fixes: 1536e2857bd3 ("tcp: Add a TCP_FASTOPEN socket option to get a max backlog on its listner")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-12-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/tcp.h | 2 +-
+ net/ipv4/tcp.c | 2 +-
+ net/ipv4/tcp_fastopen.c | 6 ++++--
+ 3 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/include/linux/tcp.h b/include/linux/tcp.h
+index 41b1da621a458..9cd289ad3f5b5 100644
+--- a/include/linux/tcp.h
++++ b/include/linux/tcp.h
+@@ -510,7 +510,7 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog)
+ struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue;
+ int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn);
+
+- queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn);
++ WRITE_ONCE(queue->fastopenq.max_qlen, min_t(unsigned int, backlog, somaxconn));
+ }
+
+ static inline void tcp_move_syn(struct tcp_sock *tp,
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index b3a5ff311567b..fab25d4f3a6f1 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -4247,7 +4247,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_FASTOPEN:
+- val = icsk->icsk_accept_queue.fastopenq.max_qlen;
++ val = READ_ONCE(icsk->icsk_accept_queue.fastopenq.max_qlen);
+ break;
+
+ case TCP_FASTOPEN_CONNECT:
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 45cc7f1ca2961..85e4953f11821 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -296,6 +296,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk,
+ static bool tcp_fastopen_queue_check(struct sock *sk)
+ {
+ struct fastopen_queue *fastopenq;
++ int max_qlen;
+
+ /* Make sure the listener has enabled fastopen, and we don't
+ * exceed the max # of pending TFO requests allowed before trying
+@@ -308,10 +309,11 @@ static bool tcp_fastopen_queue_check(struct sock *sk)
+ * temporarily vs a server not supporting Fast Open at all.
+ */
+ fastopenq = &inet_csk(sk)->icsk_accept_queue.fastopenq;
+- if (fastopenq->max_qlen == 0)
++ max_qlen = READ_ONCE(fastopenq->max_qlen);
++ if (max_qlen == 0)
+ return false;
+
+- if (fastopenq->qlen >= fastopenq->max_qlen) {
++ if (fastopenq->qlen >= max_qlen) {
+ struct request_sock *req1;
+ spin_lock(&fastopenq->lock);
+ req1 = fastopenq->rskq_rst_head;
+--
+2.39.2
+
--- /dev/null
+From ae744dd736807b48f042d785128b2d771387f69c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:52 +0000
+Subject: tcp: annotate data-races around icsk->icsk_syn_retries
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 3a037f0f3c4bfe44518f2fbb478aa2f99a9cd8bb ]
+
+do_tcp_getsockopt() and reqsk_timer_handler() read
+icsk->icsk_syn_retries while another cpu might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-7-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/inet_connection_sock.c | 2 +-
+ net/ipv4/tcp.c | 6 +++---
+ 2 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
+index 8e35ea66d930a..62a3b103f258a 100644
+--- a/net/ipv4/inet_connection_sock.c
++++ b/net/ipv4/inet_connection_sock.c
+@@ -1016,7 +1016,7 @@ static void reqsk_timer_handler(struct timer_list *t)
+
+ icsk = inet_csk(sk_listener);
+ net = sock_net(sk_listener);
+- max_syn_ack_retries = icsk->icsk_syn_retries ? :
++ max_syn_ack_retries = READ_ONCE(icsk->icsk_syn_retries) ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_synack_retries);
+ /* Normally all the openreqs are young and become mature
+ * (i.e. converted to established socket) for first timeout.
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 7d75928ea0f9c..ffa9717293358 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3397,7 +3397,7 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- inet_csk(sk)->icsk_syn_retries = val;
++ WRITE_ONCE(inet_csk(sk)->icsk_syn_retries, val);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3678,7 +3678,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+ err = -EINVAL;
+ else
+- icsk->icsk_syn_retries = val;
++ WRITE_ONCE(icsk->icsk_syn_retries, val);
+ break;
+
+ case TCP_SAVE_SYN:
+@@ -4095,7 +4095,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ val = keepalive_probes(tp);
+ break;
+ case TCP_SYNCNT:
+- val = icsk->icsk_syn_retries ? :
++ val = READ_ONCE(icsk->icsk_syn_retries) ? :
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+ break;
+ case TCP_LINGER2:
+--
+2.39.2
+
--- /dev/null
+From 7efbdf0a8a4d26103224e8eb9779b4b5c48a11c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:56 +0000
+Subject: tcp: annotate data-races around icsk->icsk_user_timeout
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 26023e91e12c68669db416b97234328a03d8e499 ]
+
+This field can be read locklessly from do_tcp_getsockopt()
+
+Fixes: dca43c75e7e5 ("tcp: Add TCP_USER_TIMEOUT socket option.")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-11-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 6 +++---
+ 1 file changed, 3 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 6f3a494b965ae..b3a5ff311567b 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3406,7 +3406,7 @@ EXPORT_SYMBOL(tcp_sock_set_syncnt);
+ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+ {
+ lock_sock(sk);
+- inet_csk(sk)->icsk_user_timeout = val;
++ WRITE_ONCE(inet_csk(sk)->icsk_user_timeout, val);
+ release_sock(sk);
+ }
+ EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+@@ -3726,7 +3726,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 0)
+ err = -EINVAL;
+ else
+- icsk->icsk_user_timeout = val;
++ WRITE_ONCE(icsk->icsk_user_timeout, val);
+ break;
+
+ case TCP_FASTOPEN:
+@@ -4243,7 +4243,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_USER_TIMEOUT:
+- val = icsk->icsk_user_timeout;
++ val = READ_ONCE(icsk->icsk_user_timeout);
+ break;
+
+ case TCP_FASTOPEN:
+--
+2.39.2
+
--- /dev/null
+From 7cb1fa4e8fc2528b3c95ebf4367b85eaf269c0e9 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:54 +0000
+Subject: tcp: annotate data-races around rskq_defer_accept
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit ae488c74422fb1dcd807c0201804b3b5e8a322a3 ]
+
+do_tcp_getsockopt() reads rskq_defer_accept while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-9-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 11 ++++++-----
+ 1 file changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 363535b6ece83..bc3ad48f92389 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3700,9 +3700,9 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+
+ case TCP_DEFER_ACCEPT:
+ /* Translate value in seconds to number of retransmits */
+- icsk->icsk_accept_queue.rskq_defer_accept =
+- secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
+- TCP_RTO_MAX / HZ);
++ WRITE_ONCE(icsk->icsk_accept_queue.rskq_defer_accept,
++ secs_to_retrans(val, TCP_TIMEOUT_INIT / HZ,
++ TCP_RTO_MAX / HZ));
+ break;
+
+ case TCP_WINDOW_CLAMP:
+@@ -4104,8 +4104,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+ break;
+ case TCP_DEFER_ACCEPT:
+- val = retrans_to_secs(icsk->icsk_accept_queue.rskq_defer_accept,
+- TCP_TIMEOUT_INIT / HZ, TCP_RTO_MAX / HZ);
++ val = READ_ONCE(icsk->icsk_accept_queue.rskq_defer_accept);
++ val = retrans_to_secs(val, TCP_TIMEOUT_INIT / HZ,
++ TCP_RTO_MAX / HZ);
+ break;
+ case TCP_WINDOW_CLAMP:
+ val = tp->window_clamp;
+--
+2.39.2
+
--- /dev/null
+From 2a19bb80f620e9115ee081f89944c9fc3882cceb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:44:45 +0000
+Subject: tcp: annotate data-races around tcp_rsk(req)->ts_recent
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit eba20811f32652bc1a52d5e7cc403859b86390d9 ]
+
+TCP request sockets are lockless, tcp_rsk(req)->ts_recent
+can change while being read by another cpu as syzbot noticed.
+
+This is harmless, but we should annotate the known races.
+
+Note that tcp_check_req() changes req->ts_recent a bit early,
+we might change this in the future.
+
+BUG: KCSAN: data-race in tcp_check_req / tcp_check_req
+
+write to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 1:
+tcp_check_req+0x694/0xc70 net/ipv4/tcp_minisocks.c:762
+tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071
+ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205
+ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254
+dst_input include/net/dst.h:468 [inline]
+ip_rcv_finish net/ipv4/ip_input.c:449 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569
+__netif_receive_skb_one_core net/core/dev.c:5493 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607
+process_backlog+0x21f/0x380 net/core/dev.c:5935
+__napi_poll+0x60/0x3b0 net/core/dev.c:6498
+napi_poll net/core/dev.c:6565 [inline]
+net_rx_action+0x32b/0x750 net/core/dev.c:6698
+__do_softirq+0xc1/0x265 kernel/softirq.c:571
+do_softirq+0x7e/0xb0 kernel/softirq.c:472
+__local_bh_enable_ip+0x64/0x70 kernel/softirq.c:396
+local_bh_enable+0x1f/0x20 include/linux/bottom_half.h:33
+rcu_read_unlock_bh include/linux/rcupdate.h:843 [inline]
+__dev_queue_xmit+0xabb/0x1d10 net/core/dev.c:4271
+dev_queue_xmit include/linux/netdevice.h:3088 [inline]
+neigh_hh_output include/net/neighbour.h:528 [inline]
+neigh_output include/net/neighbour.h:542 [inline]
+ip_finish_output2+0x700/0x840 net/ipv4/ip_output.c:229
+ip_finish_output+0xf4/0x240 net/ipv4/ip_output.c:317
+NF_HOOK_COND include/linux/netfilter.h:292 [inline]
+ip_output+0xe5/0x1b0 net/ipv4/ip_output.c:431
+dst_output include/net/dst.h:458 [inline]
+ip_local_out net/ipv4/ip_output.c:126 [inline]
+__ip_queue_xmit+0xa4d/0xa70 net/ipv4/ip_output.c:533
+ip_queue_xmit+0x38/0x40 net/ipv4/ip_output.c:547
+__tcp_transmit_skb+0x1194/0x16e0 net/ipv4/tcp_output.c:1399
+tcp_transmit_skb net/ipv4/tcp_output.c:1417 [inline]
+tcp_write_xmit+0x13ff/0x2fd0 net/ipv4/tcp_output.c:2693
+__tcp_push_pending_frames+0x6a/0x1a0 net/ipv4/tcp_output.c:2877
+tcp_push_pending_frames include/net/tcp.h:1952 [inline]
+__tcp_sock_set_cork net/ipv4/tcp.c:3336 [inline]
+tcp_sock_set_cork+0xe8/0x100 net/ipv4/tcp.c:3343
+rds_tcp_xmit_path_complete+0x3b/0x40 net/rds/tcp_send.c:52
+rds_send_xmit+0xf8d/0x1420 net/rds/send.c:422
+rds_send_worker+0x42/0x1d0 net/rds/threads.c:200
+process_one_work+0x3e6/0x750 kernel/workqueue.c:2408
+worker_thread+0x5f2/0xa10 kernel/workqueue.c:2555
+kthread+0x1d7/0x210 kernel/kthread.c:379
+ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
+
+read to 0xffff88813c8afb84 of 4 bytes by interrupt on cpu 0:
+tcp_check_req+0x32a/0xc70 net/ipv4/tcp_minisocks.c:622
+tcp_v4_rcv+0x12db/0x1b70 net/ipv4/tcp_ipv4.c:2071
+ip_protocol_deliver_rcu+0x356/0x6d0 net/ipv4/ip_input.c:205
+ip_local_deliver_finish+0x13c/0x1a0 net/ipv4/ip_input.c:233
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_local_deliver+0xec/0x1c0 net/ipv4/ip_input.c:254
+dst_input include/net/dst.h:468 [inline]
+ip_rcv_finish net/ipv4/ip_input.c:449 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip_rcv+0x197/0x270 net/ipv4/ip_input.c:569
+__netif_receive_skb_one_core net/core/dev.c:5493 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5607
+process_backlog+0x21f/0x380 net/core/dev.c:5935
+__napi_poll+0x60/0x3b0 net/core/dev.c:6498
+napi_poll net/core/dev.c:6565 [inline]
+net_rx_action+0x32b/0x750 net/core/dev.c:6698
+__do_softirq+0xc1/0x265 kernel/softirq.c:571
+run_ksoftirqd+0x17/0x20 kernel/softirq.c:939
+smpboot_thread_fn+0x30a/0x4a0 kernel/smpboot.c:164
+kthread+0x1d7/0x210 kernel/kthread.c:379
+ret_from_fork+0x1f/0x30 arch/x86/entry/entry_64.S:308
+
+value changed: 0x1cd237f1 -> 0x1cd237f2
+
+Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717144445.653164-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 2 +-
+ net/ipv4/tcp_minisocks.c | 9 ++++++---
+ net/ipv4/tcp_output.c | 2 +-
+ net/ipv6/tcp_ipv6.c | 2 +-
+ 4 files changed, 9 insertions(+), 6 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index e5df50b3e23a0..d49a66b271d52 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -988,7 +988,7 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+- req->ts_recent,
++ READ_ONCE(req->ts_recent),
+ 0,
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index f281eab7fd125..42844d20da020 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -537,7 +537,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ newtp->max_window = newtp->snd_wnd;
+
+ if (newtp->rx_opt.tstamp_ok) {
+- newtp->rx_opt.ts_recent = req->ts_recent;
++ newtp->rx_opt.ts_recent = READ_ONCE(req->ts_recent);
+ newtp->rx_opt.ts_recent_stamp = ktime_get_seconds();
+ newtp->tcp_header_len = sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED;
+ } else {
+@@ -601,7 +601,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+ tcp_parse_options(sock_net(sk), skb, &tmp_opt, 0, NULL);
+
+ if (tmp_opt.saw_tstamp) {
+- tmp_opt.ts_recent = req->ts_recent;
++ tmp_opt.ts_recent = READ_ONCE(req->ts_recent);
+ if (tmp_opt.rcv_tsecr)
+ tmp_opt.rcv_tsecr -= tcp_rsk(req)->ts_off;
+ /* We do not store true stamp, but it is not required,
+@@ -740,8 +740,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
+
+ /* In sequence, PAWS is OK. */
+
++ /* TODO: We probably should defer ts_recent change once
++ * we take ownership of @req.
++ */
+ if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt))
+- req->ts_recent = tmp_opt.rcv_tsval;
++ WRITE_ONCE(req->ts_recent, tmp_opt.rcv_tsval);
+
+ if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) {
+ /* Truncate SYN, it is out of window starting
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 693a29d3f43bd..26bd039f9296f 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -876,7 +876,7 @@ static unsigned int tcp_synack_options(const struct sock *sk,
+ if (likely(ireq->tstamp_ok)) {
+ opts->options |= OPTION_TS;
+ opts->tsval = tcp_skb_timestamp(skb) + tcp_rsk(req)->ts_off;
+- opts->tsecr = req->ts_recent;
++ opts->tsecr = READ_ONCE(req->ts_recent);
+ remaining -= TCPOLEN_TSTAMP_ALIGNED;
+ }
+ if (likely(ireq->sack_ok)) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 0dcb06a1fe044..d9253aa764fae 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1130,7 +1130,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ tcp_rsk(req)->rcv_nxt,
+ req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
+ tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
+- req->ts_recent, sk->sk_bound_dev_if,
++ READ_ONCE(req->ts_recent), sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+ READ_ONCE(tcp_rsk(req)->txhash));
+--
+2.39.2
+
--- /dev/null
+From d29e41820d443947afb2314e6e9891e047903726 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Jul 2023 14:44:44 +0000
+Subject: tcp: annotate data-races around tcp_rsk(req)->txhash
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 5e5265522a9a7f91d1b0bd411d634bdaf16c80cd ]
+
+TCP request sockets are lockless, some of their fields
+can change while being read by another cpu as syzbot noticed.
+
+This is usually harmless, but we should annotate the known
+races.
+
+This patch takes care of tcp_rsk(req)->txhash,
+a separate one is needed for tcp_rsk(req)->ts_recent.
+
+BUG: KCSAN: data-race in tcp_make_synack / tcp_rtx_synack
+
+write to 0xffff8881362304bc of 4 bytes by task 32083 on cpu 1:
+tcp_rtx_synack+0x9d/0x2a0 net/ipv4/tcp_output.c:4213
+inet_rtx_syn_ack+0x38/0x80 net/ipv4/inet_connection_sock.c:880
+tcp_check_req+0x379/0xc70 net/ipv4/tcp_minisocks.c:665
+tcp_v6_rcv+0x125b/0x1b20 net/ipv6/tcp_ipv6.c:1673
+ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437
+ip6_input_finish net/ipv6/ip6_input.c:482 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491
+dst_input include/net/dst.h:468 [inline]
+ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309
+__netif_receive_skb_one_core net/core/dev.c:5452 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566
+netif_receive_skb_internal net/core/dev.c:5652 [inline]
+netif_receive_skb+0x4a/0x310 net/core/dev.c:5711
+tun_rx_batched+0x3bf/0x400
+tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997
+tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043
+call_write_iter include/linux/fs.h:1871 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x4ab/0x7d0 fs/read_write.c:584
+ksys_write+0xeb/0x1a0 fs/read_write.c:637
+__do_sys_write fs/read_write.c:649 [inline]
+__se_sys_write fs/read_write.c:646 [inline]
+__x64_sys_write+0x42/0x50 fs/read_write.c:646
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+read to 0xffff8881362304bc of 4 bytes by task 32078 on cpu 0:
+tcp_make_synack+0x367/0xb40 net/ipv4/tcp_output.c:3663
+tcp_v6_send_synack+0x72/0x420 net/ipv6/tcp_ipv6.c:544
+tcp_conn_request+0x11a8/0x1560 net/ipv4/tcp_input.c:7059
+tcp_v6_conn_request+0x13f/0x180 net/ipv6/tcp_ipv6.c:1175
+tcp_rcv_state_process+0x156/0x1de0 net/ipv4/tcp_input.c:6494
+tcp_v6_do_rcv+0x98a/0xb70 net/ipv6/tcp_ipv6.c:1509
+tcp_v6_rcv+0x17b8/0x1b20 net/ipv6/tcp_ipv6.c:1735
+ip6_protocol_deliver_rcu+0x92f/0xf30 net/ipv6/ip6_input.c:437
+ip6_input_finish net/ipv6/ip6_input.c:482 [inline]
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ip6_input+0xbd/0x1b0 net/ipv6/ip6_input.c:491
+dst_input include/net/dst.h:468 [inline]
+ip6_rcv_finish+0x1e2/0x2e0 net/ipv6/ip6_input.c:79
+NF_HOOK include/linux/netfilter.h:303 [inline]
+ipv6_rcv+0x74/0x150 net/ipv6/ip6_input.c:309
+__netif_receive_skb_one_core net/core/dev.c:5452 [inline]
+__netif_receive_skb+0x90/0x1b0 net/core/dev.c:5566
+netif_receive_skb_internal net/core/dev.c:5652 [inline]
+netif_receive_skb+0x4a/0x310 net/core/dev.c:5711
+tun_rx_batched+0x3bf/0x400
+tun_get_user+0x1d24/0x22b0 drivers/net/tun.c:1997
+tun_chr_write_iter+0x18e/0x240 drivers/net/tun.c:2043
+call_write_iter include/linux/fs.h:1871 [inline]
+new_sync_write fs/read_write.c:491 [inline]
+vfs_write+0x4ab/0x7d0 fs/read_write.c:584
+ksys_write+0xeb/0x1a0 fs/read_write.c:637
+__do_sys_write fs/read_write.c:649 [inline]
+__se_sys_write fs/read_write.c:646 [inline]
+__x64_sys_write+0x42/0x50 fs/read_write.c:646
+do_syscall_x64 arch/x86/entry/common.c:50 [inline]
+do_syscall_64+0x41/0xc0 arch/x86/entry/common.c:80
+entry_SYSCALL_64_after_hwframe+0x63/0xcd
+
+value changed: 0x91d25731 -> 0xe79325cd
+
+Reported by Kernel Concurrency Sanitizer on:
+CPU: 0 PID: 32078 Comm: syz-executor.4 Not tainted 6.5.0-rc1-syzkaller-00033-geb26cbb1a754 #0
+Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/03/2023
+
+Fixes: 58d607d3e52f ("tcp: provide skb->hash to synack packets")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Reported-by: syzbot <syzkaller@googlegroups.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://lore.kernel.org/r/20230717144445.653164-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_ipv4.c | 3 ++-
+ net/ipv4/tcp_minisocks.c | 2 +-
+ net/ipv4/tcp_output.c | 4 ++--
+ net/ipv6/tcp_ipv6.c | 2 +-
+ 4 files changed, 6 insertions(+), 5 deletions(-)
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index ef740983a1222..e5df50b3e23a0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -992,7 +992,8 @@ static void tcp_v4_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ 0,
+ tcp_md5_do_lookup(sk, l3index, addr, AF_INET),
+ inet_rsk(req)->no_srccheck ? IP_REPLY_ARG_NOSRCCHECK : 0,
+- ip_hdr(skb)->tos, tcp_rsk(req)->txhash);
++ ip_hdr(skb)->tos,
++ READ_ONCE(tcp_rsk(req)->txhash));
+ }
+
+ /*
+diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+index 7f37e7da64671..f281eab7fd125 100644
+--- a/net/ipv4/tcp_minisocks.c
++++ b/net/ipv4/tcp_minisocks.c
+@@ -510,7 +510,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk,
+ newicsk->icsk_ack.lrcvtime = tcp_jiffies32;
+
+ newtp->lsndtime = tcp_jiffies32;
+- newsk->sk_txhash = treq->txhash;
++ newsk->sk_txhash = READ_ONCE(treq->txhash);
+ newtp->total_retrans = req->num_retrans;
+
+ tcp_init_xmit_timers(newsk);
+diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+index 925594dbeb929..693a29d3f43bd 100644
+--- a/net/ipv4/tcp_output.c
++++ b/net/ipv4/tcp_output.c
+@@ -3581,7 +3581,7 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst,
+ rcu_read_lock();
+ md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req));
+ #endif
+- skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4);
++ skb_set_hash(skb, READ_ONCE(tcp_rsk(req)->txhash), PKT_HASH_TYPE_L4);
+ /* bpf program will be interested in the tcp_flags */
+ TCP_SKB_CB(skb)->tcp_flags = TCPHDR_SYN | TCPHDR_ACK;
+ tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5,
+@@ -4124,7 +4124,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
+
+ /* Paired with WRITE_ONCE() in sock_setsockopt() */
+ if (READ_ONCE(sk->sk_txrehash) == SOCK_TXREHASH_ENABLED)
+- tcp_rsk(req)->txhash = net_tx_rndhash();
++ WRITE_ONCE(tcp_rsk(req)->txhash, net_tx_rndhash());
+ res = af_ops->send_synack(sk, NULL, &fl, req, NULL, TCP_SYNACK_NORMAL,
+ NULL);
+ if (!res) {
+diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+index 8d61efeab9c99..0dcb06a1fe044 100644
+--- a/net/ipv6/tcp_ipv6.c
++++ b/net/ipv6/tcp_ipv6.c
+@@ -1133,7 +1133,7 @@ static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
+ req->ts_recent, sk->sk_bound_dev_if,
+ tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr, l3index),
+ ipv6_get_dsfield(ipv6_hdr(skb)), 0, sk->sk_priority,
+- tcp_rsk(req)->txhash);
++ READ_ONCE(tcp_rsk(req)->txhash));
+ }
+
+
+--
+2.39.2
+
--- /dev/null
+From 078902bb3940caf45e1f58470e88e8184a16486d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:50 +0000
+Subject: tcp: annotate data-races around tp->keepalive_intvl
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 5ecf9d4f52ff2f1d4d44c9b68bc75688e82f13b4 ]
+
+do_tcp_getsockopt() reads tp->keepalive_intvl while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-5-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 9 +++++++--
+ net/ipv4/tcp.c | 4 ++--
+ 2 files changed, 9 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 397c248102415..f39c44cbdfe62 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1511,9 +1511,14 @@ void tcp_leave_memory_pressure(struct sock *sk);
+ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
++
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepintvl()
++ * and do_tcp_setsockopt().
++ */
++ val = READ_ONCE(tp->keepalive_intvl);
+
+- return tp->keepalive_intvl ? :
+- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_intvl);
+ }
+
+ static inline int keepalive_time_when(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index c0d7b226bca1a..d19cfeb78392d 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3451,7 +3451,7 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- tcp_sk(sk)->keepalive_intvl = val * HZ;
++ WRITE_ONCE(tcp_sk(sk)->keepalive_intvl, val * HZ);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3665,7 +3665,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 1 || val > MAX_TCP_KEEPINTVL)
+ err = -EINVAL;
+ else
+- tp->keepalive_intvl = val * HZ;
++ WRITE_ONCE(tp->keepalive_intvl, val * HZ);
+ break;
+ case TCP_KEEPCNT:
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+--
+2.39.2
+
--- /dev/null
+From 8b50db4f550c9b4fa395cb961dd7c9ab6b4ac010 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:51 +0000
+Subject: tcp: annotate data-races around tp->keepalive_probes
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 6e5e1de616bf5f3df1769abc9292191dfad9110a ]
+
+do_tcp_getsockopt() reads tp->keepalive_probes while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-6-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 9 +++++++--
+ net/ipv4/tcp.c | 5 +++--
+ 2 files changed, 10 insertions(+), 4 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index f39c44cbdfe62..9733d8e4f10af 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1535,9 +1535,14 @@ static inline int keepalive_time_when(const struct tcp_sock *tp)
+ static inline int keepalive_probes(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
++
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepcnt()
++ * and do_tcp_setsockopt().
++ */
++ val = READ_ONCE(tp->keepalive_probes);
+
+- return tp->keepalive_probes ? :
+- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_probes);
+ }
+
+ static inline u32 keepalive_time_elapsed(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index d19cfeb78392d..7d75928ea0f9c 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3463,7 +3463,8 @@ int tcp_sock_set_keepcnt(struct sock *sk, int val)
+ return -EINVAL;
+
+ lock_sock(sk);
+- tcp_sk(sk)->keepalive_probes = val;
++ /* Paired with READ_ONCE() in keepalive_probes() */
++ WRITE_ONCE(tcp_sk(sk)->keepalive_probes, val);
+ release_sock(sk);
+ return 0;
+ }
+@@ -3671,7 +3672,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (val < 1 || val > MAX_TCP_KEEPCNT)
+ err = -EINVAL;
+ else
+- tp->keepalive_probes = val;
++ WRITE_ONCE(tp->keepalive_probes, val);
+ break;
+ case TCP_SYNCNT:
+ if (val < 1 || val > MAX_TCP_SYNCNT)
+--
+2.39.2
+
--- /dev/null
+From 9121aedbe1355d93c6f3ab514d0878a9099021f0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:49 +0000
+Subject: tcp: annotate data-races around tp->keepalive_time
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 4164245c76ff906c9086758e1c3f87082a7f5ef5 ]
+
+do_tcp_getsockopt() reads tp->keepalive_time while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-4-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 7 +++++--
+ net/ipv4/tcp.c | 3 ++-
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 5eedd476a38d7..397c248102415 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -1519,9 +1519,12 @@ static inline int keepalive_intvl_when(const struct tcp_sock *tp)
+ static inline int keepalive_time_when(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
++ int val;
+
+- return tp->keepalive_time ? :
+- READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
++ /* Paired with WRITE_ONCE() in tcp_sock_set_keepidle_locked() */
++ val = READ_ONCE(tp->keepalive_time);
++
++ return val ? : READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time);
+ }
+
+ static inline int keepalive_probes(const struct tcp_sock *tp)
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 3edf7a1c5cbd2..c0d7b226bca1a 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3418,7 +3418,8 @@ int tcp_sock_set_keepidle_locked(struct sock *sk, int val)
+ if (val < 1 || val > MAX_TCP_KEEPIDLE)
+ return -EINVAL;
+
+- tp->keepalive_time = val * HZ;
++ /* Paired with WRITE_ONCE() in keepalive_time_when() */
++ WRITE_ONCE(tp->keepalive_time, val * HZ);
+ if (sock_flag(sk, SOCK_KEEPOPEN) &&
+ !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+ u32 elapsed = keepalive_time_elapsed(tp);
+--
+2.39.2
+
--- /dev/null
+From 3d98c816d1920605a924d0ead6bf2be144e81749 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:53 +0000
+Subject: tcp: annotate data-races around tp->linger2
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 9df5335ca974e688389c875546e5819778a80d59 ]
+
+do_tcp_getsockopt() reads tp->linger2 while another cpu
+might change its value.
+
+Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-8-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index ffa9717293358..363535b6ece83 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3691,11 +3691,11 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+
+ case TCP_LINGER2:
+ if (val < 0)
+- tp->linger2 = -1;
++ WRITE_ONCE(tp->linger2, -1);
+ else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+- tp->linger2 = TCP_FIN_TIMEOUT_MAX;
++ WRITE_ONCE(tp->linger2, TCP_FIN_TIMEOUT_MAX);
+ else
+- tp->linger2 = val * HZ;
++ WRITE_ONCE(tp->linger2, val * HZ);
+ break;
+
+ case TCP_DEFER_ACCEPT:
+@@ -4099,7 +4099,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ READ_ONCE(net->ipv4.sysctl_tcp_syn_retries);
+ break;
+ case TCP_LINGER2:
+- val = tp->linger2;
++ val = READ_ONCE(tp->linger2);
+ if (val >= 0)
+ val = (val ? : READ_ONCE(net->ipv4.sysctl_tcp_fin_timeout)) / HZ;
+ break;
+--
+2.39.2
+
--- /dev/null
+From e13aeaa389758176f64c75eeb7dd1bf6ebee1871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:55 +0000
+Subject: tcp: annotate data-races around tp->notsent_lowat
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 1aeb87bc1440c5447a7fa2d6e3c2cca52cbd206b ]
+
+tp->notsent_lowat can be read locklessly from do_tcp_getsockopt()
+and tcp_poll().
+
+Fixes: c9bee3b7fdec ("tcp: TCP_NOTSENT_LOWAT socket option")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-10-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 6 +++++-
+ net/ipv4/tcp.c | 4 ++--
+ 2 files changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index 9733d8e4f10af..e9c8f88f47696 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -2059,7 +2059,11 @@ void __tcp_v4_send_check(struct sk_buff *skb, __be32 saddr, __be32 daddr);
+ static inline u32 tcp_notsent_lowat(const struct tcp_sock *tp)
+ {
+ struct net *net = sock_net((struct sock *)tp);
+- return tp->notsent_lowat ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
++ u32 val;
++
++ val = READ_ONCE(tp->notsent_lowat);
++
++ return val ?: READ_ONCE(net->ipv4.sysctl_tcp_notsent_lowat);
+ }
+
+ bool tcp_stream_memory_free(const struct sock *sk, int wake);
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index bc3ad48f92389..6f3a494b965ae 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3770,7 +3770,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ err = tcp_repair_set_window(tp, optval, optlen);
+ break;
+ case TCP_NOTSENT_LOWAT:
+- tp->notsent_lowat = val;
++ WRITE_ONCE(tp->notsent_lowat, val);
+ sk->sk_write_space(sk);
+ break;
+ case TCP_INQ:
+@@ -4266,7 +4266,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+ break;
+ case TCP_NOTSENT_LOWAT:
+- val = tp->notsent_lowat;
++ val = READ_ONCE(tp->notsent_lowat);
+ break;
+ case TCP_INQ:
+ val = tp->recvmsg_inq;
+--
+2.39.2
+
--- /dev/null
+From acc05127977764c50f101313e03fed5dd0b7728e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:47 +0000
+Subject: tcp: annotate data-races around tp->tcp_tx_delay
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit 348b81b68b13ebd489a3e6a46aa1c384c731c919 ]
+
+do_tcp_getsockopt() reads tp->tcp_tx_delay while another cpu
+might change its value.
+
+Fixes: a842fe1425cb ("tcp: add optional per socket transmit delay")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-2-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 0bd0be3c63d22..5e4bc80dc0ae5 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3780,7 +3780,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ case TCP_TX_DELAY:
+ if (val)
+ tcp_enable_tx_delay();
+- tp->tcp_tx_delay = val;
++ WRITE_ONCE(tp->tcp_tx_delay, val);
+ break;
+ default:
+ err = -ENOPROTOOPT;
+@@ -4256,7 +4256,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_TX_DELAY:
+- val = tp->tcp_tx_delay;
++ val = READ_ONCE(tp->tcp_tx_delay);
+ break;
+
+ case TCP_TIMESTAMP:
+--
+2.39.2
+
--- /dev/null
+From 5cb5df7c5c218e8bc062747711555eb97a17ceb0 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 19 Jul 2023 21:28:48 +0000
+Subject: tcp: annotate data-races around tp->tsoffset
+
+From: Eric Dumazet <edumazet@google.com>
+
+[ Upstream commit dd23c9f1e8d5c1d2e3d29393412385ccb9c7a948 ]
+
+do_tcp_getsockopt() reads tp->tsoffset while another cpu
+might change its value.
+
+Fixes: 93be6ce0e91b ("tcp: set and get per-socket timestamp")
+Signed-off-by: Eric Dumazet <edumazet@google.com>
+Link: https://lore.kernel.org/r/20230719212857.3943972-3-edumazet@google.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp.c | 4 ++--
+ net/ipv4/tcp_ipv4.c | 5 +++--
+ 2 files changed, 5 insertions(+), 4 deletions(-)
+
+diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+index 5e4bc80dc0ae5..3edf7a1c5cbd2 100644
+--- a/net/ipv4/tcp.c
++++ b/net/ipv4/tcp.c
+@@ -3762,7 +3762,7 @@ int do_tcp_setsockopt(struct sock *sk, int level, int optname,
+ if (!tp->repair)
+ err = -EPERM;
+ else
+- tp->tsoffset = val - tcp_time_stamp_raw();
++ WRITE_ONCE(tp->tsoffset, val - tcp_time_stamp_raw());
+ break;
+ case TCP_REPAIR_WINDOW:
+ err = tcp_repair_set_window(tp, optval, optlen);
+@@ -4260,7 +4260,7 @@ int do_tcp_getsockopt(struct sock *sk, int level,
+ break;
+
+ case TCP_TIMESTAMP:
+- val = tcp_time_stamp_raw() + tp->tsoffset;
++ val = tcp_time_stamp_raw() + READ_ONCE(tp->tsoffset);
+ break;
+ case TCP_NOTSENT_LOWAT:
+ val = tp->notsent_lowat;
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index d49a66b271d52..9a8d59e9303a0 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -307,8 +307,9 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
+ inet->inet_daddr,
+ inet->inet_sport,
+ usin->sin_port));
+- tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr,
+- inet->inet_daddr);
++ WRITE_ONCE(tp->tsoffset,
++ secure_tcp_ts_off(net, inet->inet_saddr,
++ inet->inet_daddr));
+ }
+
+ inet->inet_id = get_random_u16();
+--
+2.39.2
+
--- /dev/null
+From 3af33ea1ad72a1fc6ed5074f0ce9e16cc52c818e Mon Sep 17 00:00:00 2001
+From: Jan Kara <jack@suse.cz>
+Date: Wed, 21 Jun 2023 11:32:35 +0200
+Subject: [PATCH AUTOSEL 4.19 07/11] udf: Fix uninitialized array access for
+ some pathnames
+X-stable: review
+X-Patchwork-Hint: Ignore
+X-stable-base: Linux 4.19.288
+
+[ Upstream commit 028f6055c912588e6f72722d89c30b401bbcf013 ]
+
+For filenames that begin with . and are between 2 and 5 characters long,
+UDF charset conversion code would read uninitialized memory in the
+output buffer. The only practical impact is that the name may be prepended a
+"unification hash" when it is not actually needed but still it is good
+to fix this.
+
+Reported-by: syzbot+cd311b1e43cc25f90d18@syzkaller.appspotmail.com
+Link: https://lore.kernel.org/all/000000000000e2638a05fe9dc8f9@google.com
+Signed-off-by: Jan Kara <jack@suse.cz>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/udf/unicode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
+index 5fcfa96463ebb..85521d6b02370 100644
+--- a/fs/udf/unicode.c
++++ b/fs/udf/unicode.c
+@@ -247,7 +247,7 @@ static int udf_name_from_CS0(struct super_block *sb,
+ }
+
+ if (translate) {
+- if (str_o_len <= 2 && str_o[0] == '.' &&
++ if (str_o_len > 0 && str_o_len <= 2 && str_o[0] == '.' &&
+ (str_o_len == 1 || str_o[1] == '.'))
+ needsCRC = 1;
+ if (needsCRC) {
+--
+2.39.2
+
--- /dev/null
+From 1c0a043a5b5d55b841bdb8e72a4e7dbded64e33b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 26 May 2023 12:41:06 +0300
+Subject: wifi: ath11k: add support default regdb while searching board-2.bin
+ for WCN6855
+
+From: Wen Gong <quic_wgong@quicinc.com>
+
+[ Upstream commit 88ca89202f8e8afb5225eb5244d79cd67c15d744 ]
+
+Sometimes board-2.bin does not have the regdb data which matched the
+parameters such as vendor, device, subsystem-vendor, subsystem-device
+and etc. Add default regdb data with 'bus=%s' into board-2.bin for
+WCN6855, then ath11k use 'bus=pci' to search regdb data in board-2.bin
+for WCN6855.
+
+kernel: [ 122.515808] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262'
+kernel: [ 122.517240] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564
+kernel: [ 122.517280] ath11k_pci 0000:03:00.0: failed to fetch regdb data for bus=pci,vendor=17cb,device=1103,subsystem-vendor=17cb,subsystem-device=3374,qmi-chip-id=2,qmi-board-id=262 from ath11k/WCN6855/hw2.0/board-2.bin
+kernel: [ 122.517464] ath11k_pci 0000:03:00.0: boot using board name 'bus=pci'
+kernel: [ 122.518901] ath11k_pci 0000:03:00.0: boot firmware request ath11k/WCN6855/hw2.0/board-2.bin size 6179564
+kernel: [ 122.518915] ath11k_pci 0000:03:00.0: board name
+kernel: [ 122.518917] ath11k_pci 0000:03:00.0: 00000000: 62 75 73 3d 70 63 69 bus=pci
+kernel: [ 122.518918] ath11k_pci 0000:03:00.0: boot found match regdb data for name 'bus=pci'
+kernel: [ 122.518920] ath11k_pci 0000:03:00.0: boot found regdb data for 'bus=pci'
+kernel: [ 122.518921] ath11k_pci 0000:03:00.0: fetched regdb
+
+Tested-on: WCN6855 hw2.0 PCI WLAN.HSP.1.1-03125-QCAHSPSWPL_V1_V2_SILICONZ_LITE-3
+
+Signed-off-by: Wen Gong <quic_wgong@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230517133959.8224-1-quic_wgong@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/core.c | 53 +++++++++++++++++++-------
+ 1 file changed, 40 insertions(+), 13 deletions(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
+index b99180bc81723..893fefadbba96 100644
+--- a/drivers/net/wireless/ath/ath11k/core.c
++++ b/drivers/net/wireless/ath/ath11k/core.c
+@@ -870,7 +870,8 @@ int ath11k_core_check_dt(struct ath11k_base *ab)
+ }
+
+ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+- size_t name_len, bool with_variant)
++ size_t name_len, bool with_variant,
++ bool bus_type_mode)
+ {
+ /* strlen(',variant=') + strlen(ab->qmi.target.bdf_ext) */
+ char variant[9 + ATH11K_QMI_BDF_EXT_STR_LENGTH] = { 0 };
+@@ -881,15 +882,20 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+
+ switch (ab->id.bdf_search) {
+ case ATH11K_BDF_SEARCH_BUS_AND_BOARD:
+- scnprintf(name, name_len,
+- "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
+- ath11k_bus_str(ab->hif.bus),
+- ab->id.vendor, ab->id.device,
+- ab->id.subsystem_vendor,
+- ab->id.subsystem_device,
+- ab->qmi.target.chip_id,
+- ab->qmi.target.board_id,
+- variant);
++ if (bus_type_mode)
++ scnprintf(name, name_len,
++ "bus=%s",
++ ath11k_bus_str(ab->hif.bus));
++ else
++ scnprintf(name, name_len,
++ "bus=%s,vendor=%04x,device=%04x,subsystem-vendor=%04x,subsystem-device=%04x,qmi-chip-id=%d,qmi-board-id=%d%s",
++ ath11k_bus_str(ab->hif.bus),
++ ab->id.vendor, ab->id.device,
++ ab->id.subsystem_vendor,
++ ab->id.subsystem_device,
++ ab->qmi.target.chip_id,
++ ab->qmi.target.board_id,
++ variant);
+ break;
+ default:
+ scnprintf(name, name_len,
+@@ -908,13 +914,19 @@ static int __ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+ static int ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
+ size_t name_len)
+ {
+- return __ath11k_core_create_board_name(ab, name, name_len, true);
++ return __ath11k_core_create_board_name(ab, name, name_len, true, false);
+ }
+
+ static int ath11k_core_create_fallback_board_name(struct ath11k_base *ab, char *name,
+ size_t name_len)
+ {
+- return __ath11k_core_create_board_name(ab, name, name_len, false);
++ return __ath11k_core_create_board_name(ab, name, name_len, false, false);
++}
++
++static int ath11k_core_create_bus_type_board_name(struct ath11k_base *ab, char *name,
++ size_t name_len)
++{
++ return __ath11k_core_create_board_name(ab, name, name_len, false, true);
+ }
+
+ const struct firmware *ath11k_core_firmware_request(struct ath11k_base *ab,
+@@ -1218,7 +1230,7 @@ int ath11k_core_fetch_bdf(struct ath11k_base *ab, struct ath11k_board_data *bd)
+
+ int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd)
+ {
+- char boardname[BOARD_NAME_SIZE];
++ char boardname[BOARD_NAME_SIZE], default_boardname[BOARD_NAME_SIZE];
+ int ret;
+
+ ret = ath11k_core_create_board_name(ab, boardname, BOARD_NAME_SIZE);
+@@ -1235,6 +1247,21 @@ int ath11k_core_fetch_regdb(struct ath11k_base *ab, struct ath11k_board_data *bd
+ if (!ret)
+ goto exit;
+
++ ret = ath11k_core_create_bus_type_board_name(ab, default_boardname,
++ BOARD_NAME_SIZE);
++ if (ret) {
++ ath11k_dbg(ab, ATH11K_DBG_BOOT,
++ "failed to create default board name for regdb: %d", ret);
++ goto exit;
++ }
++
++ ret = ath11k_core_fetch_board_data_api_n(ab, bd, default_boardname,
++ ATH11K_BD_IE_REGDB,
++ ATH11K_BD_IE_REGDB_NAME,
++ ATH11K_BD_IE_REGDB_DATA);
++ if (!ret)
++ goto exit;
++
+ ret = ath11k_core_fetch_board_data_api_1(ab, bd, ATH11K_REGDB_FILE_NAME);
+ if (ret)
+ ath11k_dbg(ab, ATH11K_DBG_BOOT, "failed to fetch %s from %s\n",
+--
+2.39.2
+
--- /dev/null
+From d4bcf71d3c456ca0656ec111454eda83581a3d2c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 6 Jun 2023 14:41:28 +0530
+Subject: wifi: ath11k: fix memory leak in WMI firmware stats
+
+From: P Praneesh <quic_ppranees@quicinc.com>
+
+[ Upstream commit 6aafa1c2d3e3fea2ebe84c018003f2a91722e607 ]
+
+Memory allocated for firmware pdev, vdev and beacon statistics
+are not released during rmmod.
+
+Fix it by calling ath11k_fw_stats_free() function before hardware
+unregister.
+
+While at it, avoid calling ath11k_fw_stats_free() while processing
+the firmware stats received in the WMI event because the local list
+is getting spliced and reinitialised and hence there are no elements
+in the list after splicing.
+
+Tested-on: QCN9074 hw1.0 PCI WLAN.HK.2.7.0.1-01744-QCAHKSWPL_SILICONZ-1
+
+Signed-off-by: P Praneesh <quic_ppranees@quicinc.com>
+Signed-off-by: Aditya Kumar Singh <quic_adisi@quicinc.com>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230606091128.14202-1-quic_adisi@quicinc.com
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/mac.c | 1 +
+ drivers/net/wireless/ath/ath11k/wmi.c | 5 +++++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index b19d44b3f5dfb..cb77dd6ce9665 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -9279,6 +9279,7 @@ void ath11k_mac_destroy(struct ath11k_base *ab)
+ if (!ar)
+ continue;
+
++ ath11k_fw_stats_free(&ar->fw_stats);
+ ieee80211_free_hw(ar->hw);
+ pdev->ar = NULL;
+ }
+diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
+index fad9f8d308a20..3e0a47f4a3ebd 100644
+--- a/drivers/net/wireless/ath/ath11k/wmi.c
++++ b/drivers/net/wireless/ath/ath11k/wmi.c
+@@ -7590,6 +7590,11 @@ static void ath11k_update_stats_event(struct ath11k_base *ab, struct sk_buff *sk
+ rcu_read_unlock();
+ spin_unlock_bh(&ar->data_lock);
+
++ /* Since the stats's pdev, vdev and beacon list are spliced and reinitialised
++ * at this point, no need to free the individual list.
++ */
++ return;
++
+ free:
+ ath11k_fw_stats_free(&stats);
+ }
+--
+2.39.2
+
--- /dev/null
+From 885bcbfa0c9659fa068668223c2f45c63640b4c2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 21 Apr 2023 16:54:45 +0200
+Subject: wifi: ath11k: fix registration of 6Ghz-only phy without the full
+ channel range
+
+From: Maxime Bizon <mbizon@freebox.fr>
+
+[ Upstream commit e2ceb1de2f83aafd8003f0b72dfd4b7441e97d14 ]
+
+Because of what seems to be a typo, a 6Ghz-only phy for which the BDF
+does not allow the 7115Mhz channel will fail to register:
+
+ WARNING: CPU: 2 PID: 106 at net/wireless/core.c:907 wiphy_register+0x914/0x954
+ Modules linked in: ath11k_pci sbsa_gwdt
+ CPU: 2 PID: 106 Comm: kworker/u8:5 Not tainted 6.3.0-rc7-next-20230418-00549-g1e096a17625a-dirty #9
+ Hardware name: Freebox V7R Board (DT)
+ Workqueue: ath11k_qmi_driver_event ath11k_qmi_driver_event_work
+ pstate: 60000005 (nZCv daif -PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : wiphy_register+0x914/0x954
+ lr : ieee80211_register_hw+0x67c/0xc10
+ sp : ffffff800b123aa0
+ x29: ffffff800b123aa0 x28: 0000000000000000 x27: 0000000000000000
+ x26: 0000000000000000 x25: 0000000000000006 x24: ffffffc008d51418
+ x23: ffffffc008cb0838 x22: ffffff80176c2460 x21: 0000000000000168
+ x20: ffffff80176c0000 x19: ffffff80176c03e0 x18: 0000000000000014
+ x17: 00000000cbef338c x16: 00000000d2a26f21 x15: 00000000ad6bb85f
+ x14: 0000000000000020 x13: 0000000000000020 x12: 00000000ffffffbd
+ x11: 0000000000000208 x10: 00000000fffffdf7 x9 : ffffffc009394718
+ x8 : ffffff80176c0528 x7 : 000000007fffffff x6 : 0000000000000006
+ x5 : 0000000000000005 x4 : ffffff800b304284 x3 : ffffff800b304284
+ x2 : ffffff800b304d98 x1 : 0000000000000000 x0 : 0000000000000000
+ Call trace:
+ wiphy_register+0x914/0x954
+ ieee80211_register_hw+0x67c/0xc10
+ ath11k_mac_register+0x7c4/0xe10
+ ath11k_core_qmi_firmware_ready+0x1f4/0x570
+ ath11k_qmi_driver_event_work+0x198/0x590
+ process_one_work+0x1b8/0x328
+ worker_thread+0x6c/0x414
+ kthread+0x100/0x104
+ ret_from_fork+0x10/0x20
+ ---[ end trace 0000000000000000 ]---
+ ath11k_pci 0002:01:00.0: ieee80211 registration failed: -22
+ ath11k_pci 0002:01:00.0: failed register the radio with mac80211: -22
+ ath11k_pci 0002:01:00.0: failed to create pdev core: -22
+
+Signed-off-by: Maxime Bizon <mbizon@freebox.fr>
+Signed-off-by: Kalle Valo <quic_kvalo@quicinc.com>
+Link: https://lore.kernel.org/r/20230421145445.2612280-1-mbizon@freebox.fr
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/ath/ath11k/mac.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
+index ef7617802491e..b19d44b3f5dfb 100644
+--- a/drivers/net/wireless/ath/ath11k/mac.c
++++ b/drivers/net/wireless/ath/ath11k/mac.c
+@@ -8715,7 +8715,7 @@ static int ath11k_mac_setup_channels_rates(struct ath11k *ar,
+ }
+
+ if (supported_bands & WMI_HOST_WLAN_5G_CAP) {
+- if (reg_cap->high_5ghz_chan >= ATH11K_MAX_6G_FREQ) {
++ if (reg_cap->high_5ghz_chan >= ATH11K_MIN_6G_FREQ) {
+ channels = kmemdup(ath11k_6ghz_channels,
+ sizeof(ath11k_6ghz_channels), GFP_KERNEL);
+ if (!channels) {
+--
+2.39.2
+
--- /dev/null
+From 1a37162f09f199864048ac62ae05cc6310aef58f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 13:03:59 +0300
+Subject: wifi: iwlwifi: Add support for new PCI Id
+
+From: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
+
+[ Upstream commit 35bd6f1d043d089fcb60450e1287cc65f0095787 ]
+
+Add support for the PCI Id 51F1 without IMR support.
+
+Signed-off-by: Mukesh Sisodiya <mukesh.sisodiya@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230620125813.9800e652e789.Ic06a085832ac3f988c8ef07d856c8e281563295d@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index f6872b2a0d9d0..d5bd869086458 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -495,6 +495,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
+ {IWL_PCI_DEVICE(0x7AF0, PCI_ANY_ID, iwl_so_trans_cfg)},
+ {IWL_PCI_DEVICE(0x51F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+ {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_imr_trans_cfg)},
++ {IWL_PCI_DEVICE(0x51F1, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+ {IWL_PCI_DEVICE(0x54F0, PCI_ANY_ID, iwl_so_long_latency_trans_cfg)},
+ {IWL_PCI_DEVICE(0x7F70, PCI_ANY_ID, iwl_so_trans_cfg)},
+
+@@ -543,6 +544,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+ IWL_DEV_INFO(0x51F0, 0x1551, iwl9560_2ac_cfg_soc, iwl9560_killer_1550i_160_name),
+ IWL_DEV_INFO(0x51F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+ IWL_DEV_INFO(0x51F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
++ IWL_DEV_INFO(0x51F1, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
+ IWL_DEV_INFO(0x54F0, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+ IWL_DEV_INFO(0x54F0, 0x1692, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690i_name),
+ IWL_DEV_INFO(0x7A70, 0x1691, iwlax411_2ax_cfg_so_gf4_a0, iwl_ax411_killer_1690s_name),
+--
+2.39.2
+
--- /dev/null
+From dd01d6d149a5c58b8f2f7d9e9211ce28c8befd64 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 20 Jun 2023 13:04:02 +0300
+Subject: wifi: iwlwifi: mvm: avoid baid size integer overflow
+
+From: Johannes Berg <johannes.berg@intel.com>
+
+[ Upstream commit 1a528ab1da324d078ec60283c34c17848580df24 ]
+
+Roee reported various hard-to-debug crashes with pings in
+EHT aggregation scenarios. Enabling KASAN showed that we
+access the BAID allocation out of bounds, and looking at
+the code a bit shows that since the reorder buffer entry
+(struct iwl_mvm_reorder_buf_entry) is 128 bytes if debug
+such as lockdep is enabled, then staring from an agg size
+512 we overflow the size calculation, and allocate a much
+smaller structure than we should, causing slab corruption
+once we initialize this.
+
+Fix this by simply using u32 instead of u16.
+
+Reported-by: Roee Goldfiner <roee.h.goldfiner@intel.com>
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230620125813.f428c856030d.I2c2bb808e945adb71bc15f5b2bac2d8957ea90eb@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+index 013aca70c3d3b..6b52afcf02721 100644
+--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
++++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+@@ -2738,7 +2738,7 @@ int iwl_mvm_sta_rx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
+ }
+
+ if (iwl_mvm_has_new_rx_api(mvm) && start) {
+- u16 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
++ u32 reorder_buf_size = buf_size * sizeof(baid_data->entries[0]);
+
+ /* sparse doesn't like the __align() so don't check */
+ #ifndef __CHECKER__
+--
+2.39.2
+
--- /dev/null
+From 80c181a4bc2b86eb00ab6e09dcbcdda26aa6fc13 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 21 Jun 2023 13:12:20 +0300
+Subject: wifi: iwlwifi: pcie: add device id 51F1 for killer 1675
+
+From: Yi Kuo <yi@yikuo.dev>
+
+[ Upstream commit f4daceae4087bbb3e9a56044b44601d520d009d2 ]
+
+Intel Killer AX1675i/s with device id 51f1 would show
+"No config found for PCI dev 51f1/1672" in dmesg and refuse to work.
+Add the new device id 51F1 for 1675i/s to fix the issue.
+
+Signed-off-by: Yi Kuo <yi@yikuo.dev>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230621130444.ee224675380b.I921c905e21e8d041ad808def8f454f27b5ebcd8b@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+index d5bd869086458..4d4db5f6836be 100644
+--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
++++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+@@ -683,6 +683,8 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
+ IWL_DEV_INFO(0x2726, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+ IWL_DEV_INFO(0x51F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+ IWL_DEV_INFO(0x51F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
++ IWL_DEV_INFO(0x51F1, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
++ IWL_DEV_INFO(0x51F1, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+ IWL_DEV_INFO(0x54F0, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+ IWL_DEV_INFO(0x54F0, 0x1672, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675i_name),
+ IWL_DEV_INFO(0x7A70, 0x1671, iwlax211_2ax_cfg_so_gf_a0, iwl_ax211_killer_1675s_name),
+--
+2.39.2
+
--- /dev/null
+From a7163d690f5af8b426d97da0807e07b334cb5bdb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 4 Jun 2023 12:11:27 +0300
+Subject: wifi: mac80211_hwsim: Fix possible NULL dereference
+
+From: Ilan Peer <ilan.peer@intel.com>
+
+[ Upstream commit 0cc80943ef518a1c51a1111e9346d1daf11dd545 ]
+
+In a call to mac80211_hwsim_select_tx_link() the sta pointer might
+be NULL, thus need to check that it is not NULL before accessing it.
+
+Signed-off-by: Ilan Peer <ilan.peer@intel.com>
+Signed-off-by: Gregory Greenman <gregory.greenman@intel.com>
+Link: https://lore.kernel.org/r/20230604120651.f4d889fc98c4.Iae85f527ed245a37637a874bb8b8c83d79812512@changeid
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/wireless/mac80211_hwsim.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
+index 0d81098c7b45c..da5c355405f68 100644
+--- a/drivers/net/wireless/mac80211_hwsim.c
++++ b/drivers/net/wireless/mac80211_hwsim.c
+@@ -4,7 +4,7 @@
+ * Copyright (c) 2008, Jouni Malinen <j@w1.fi>
+ * Copyright (c) 2011, Javier Lopez <jlopex@gmail.com>
+ * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
+- * Copyright (C) 2018 - 2022 Intel Corporation
++ * Copyright (C) 2018 - 2023 Intel Corporation
+ */
+
+ /*
+@@ -1753,7 +1753,7 @@ mac80211_hwsim_select_tx_link(struct mac80211_hwsim_data *data,
+
+ WARN_ON(is_multicast_ether_addr(hdr->addr1));
+
+- if (WARN_ON_ONCE(!sta->valid_links))
++ if (WARN_ON_ONCE(!sta || !sta->valid_links))
+ return &vif->bss_conf;
+
+ for (i = 0; i < ARRAY_SIZE(vif->link_conf); i++) {
+--
+2.39.2
+
--- /dev/null
+From 683ebdf526ff6b7d1a58030e79ed32ee6779a0ac Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 15 Jun 2023 12:04:07 -0600
+Subject: wifi: wext-core: Fix -Wstringop-overflow warning in
+ ioctl_standard_iw_point()
+
+From: Gustavo A. R. Silva <gustavoars@kernel.org>
+
+[ Upstream commit 71e7552c90db2a2767f5c17c7ec72296b0d92061 ]
+
+-Wstringop-overflow is legitimately warning us about extra_size
+pontentially being zero at some point, hence potenially ending
+up _allocating_ zero bytes of memory for extra pointer and then
+trying to access such object in a call to copy_from_user().
+
+Fix this by adding a sanity check to ensure we never end up
+trying to allocate zero bytes of data for extra pointer, before
+continue executing the rest of the code in the function.
+
+Address the following -Wstringop-overflow warning seen when built
+m68k architecture with allyesconfig configuration:
+ from net/wireless/wext-core.c:11:
+In function '_copy_from_user',
+ inlined from 'copy_from_user' at include/linux/uaccess.h:183:7,
+ inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:825:7:
+arch/m68k/include/asm/string.h:48:25: warning: '__builtin_memset' writing 1 or more bytes into a region of size 0 overflows the destination [-Wstringop-overflow=]
+ 48 | #define memset(d, c, n) __builtin_memset(d, c, n)
+ | ^~~~~~~~~~~~~~~~~~~~~~~~~
+include/linux/uaccess.h:153:17: note: in expansion of macro 'memset'
+ 153 | memset(to + (n - res), 0, res);
+ | ^~~~~~
+In function 'kmalloc',
+ inlined from 'kzalloc' at include/linux/slab.h:694:9,
+ inlined from 'ioctl_standard_iw_point' at net/wireless/wext-core.c:819:10:
+include/linux/slab.h:577:16: note: at offset 1 into destination object of size 0 allocated by '__kmalloc'
+ 577 | return __kmalloc(size, flags);
+ | ^~~~~~~~~~~~~~~~~~~~~~
+
+This help with the ongoing efforts to globally enable
+-Wstringop-overflow.
+
+Link: https://github.com/KSPP/linux/issues/315
+Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
+Reviewed-by: Simon Horman <simon.horman@corigine.com>
+Link: https://lore.kernel.org/r/ZItSlzvIpjdjNfd8@work
+Signed-off-by: Johannes Berg <johannes.berg@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/wireless/wext-core.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
+index fe8765c4075d3..8a4b85f96a13a 100644
+--- a/net/wireless/wext-core.c
++++ b/net/wireless/wext-core.c
+@@ -799,6 +799,12 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+ }
+ }
+
++ /* Sanity-check to ensure we never end up _allocating_ zero
++ * bytes of data for extra.
++ */
++ if (extra_size <= 0)
++ return -EFAULT;
++
+ /* kzalloc() ensures NULL-termination for essid_compat. */
+ extra = kzalloc(extra_size, GFP_KERNEL);
+ if (!extra)
+--
+2.39.2
+