--- /dev/null
+From a2c229d4cd8c1f7b3de3464ced7b0159d1778f6a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 21:07:28 +0000
+Subject: ALSA: hda/cirrus: Correct the full scale volume set logic
+
+From: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+
+[ Upstream commit 08b613b9e2ba431db3bd15cb68ca72472a50ef5c ]
+
+This patch corrects the full-scale volume setting logic. On certain
+platforms, the full-scale volume bit is required. The current logic
+mistakenly sets this bit and incorrectly clears reserved bit 0, causing
+the headphone output to be muted.
+
+Fixes: 342b6b610ae2 ("ALSA: hda/cs8409: Fix Full Scale Volume setting for all variants")
+Signed-off-by: Vitaly Rodionov <vitalyr@opensource.cirrus.com>
+Link: https://patch.msgid.link/20250214210736.30814-1-vitalyr@opensource.cirrus.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_cs8409-tables.c | 6 +++---
+ sound/pci/hda/patch_cs8409.c | 20 +++++++++++---------
+ sound/pci/hda/patch_cs8409.h | 5 +++--
+ 3 files changed, 17 insertions(+), 14 deletions(-)
+
+diff --git a/sound/pci/hda/patch_cs8409-tables.c b/sound/pci/hda/patch_cs8409-tables.c
+index 759f48038273d..621f947e38174 100644
+--- a/sound/pci/hda/patch_cs8409-tables.c
++++ b/sound/pci/hda/patch_cs8409-tables.c
+@@ -121,7 +121,7 @@ static const struct cs8409_i2c_param cs42l42_init_reg_seq[] = {
+ { CS42L42_MIXER_CHA_VOL, 0x3F },
+ { CS42L42_MIXER_CHB_VOL, 0x3F },
+ { CS42L42_MIXER_ADC_VOL, 0x3f },
+- { CS42L42_HP_CTL, 0x03 },
++ { CS42L42_HP_CTL, 0x0D },
+ { CS42L42_MIC_DET_CTL1, 0xB6 },
+ { CS42L42_TIPSENSE_CTL, 0xC2 },
+ { CS42L42_HS_CLAMP_DISABLE, 0x01 },
+@@ -315,7 +315,7 @@ static const struct cs8409_i2c_param dolphin_c0_init_reg_seq[] = {
+ { CS42L42_ASP_TX_SZ_EN, 0x01 },
+ { CS42L42_PWR_CTL1, 0x0A },
+ { CS42L42_PWR_CTL2, 0x84 },
+- { CS42L42_HP_CTL, 0x03 },
++ { CS42L42_HP_CTL, 0x0D },
+ { CS42L42_MIXER_CHA_VOL, 0x3F },
+ { CS42L42_MIXER_CHB_VOL, 0x3F },
+ { CS42L42_MIXER_ADC_VOL, 0x3f },
+@@ -371,7 +371,7 @@ static const struct cs8409_i2c_param dolphin_c1_init_reg_seq[] = {
+ { CS42L42_ASP_TX_SZ_EN, 0x00 },
+ { CS42L42_PWR_CTL1, 0x0E },
+ { CS42L42_PWR_CTL2, 0x84 },
+- { CS42L42_HP_CTL, 0x01 },
++ { CS42L42_HP_CTL, 0x0D },
+ { CS42L42_MIXER_CHA_VOL, 0x3F },
+ { CS42L42_MIXER_CHB_VOL, 0x3F },
+ { CS42L42_MIXER_ADC_VOL, 0x3f },
+diff --git a/sound/pci/hda/patch_cs8409.c b/sound/pci/hda/patch_cs8409.c
+index 614327218634c..b760332a4e357 100644
+--- a/sound/pci/hda/patch_cs8409.c
++++ b/sound/pci/hda/patch_cs8409.c
+@@ -876,7 +876,7 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
+ { CS42L42_DET_INT_STATUS2, 0x00 },
+ { CS42L42_TSRS_PLUG_STATUS, 0x00 },
+ };
+- int fsv_old, fsv_new;
++ unsigned int fsv;
+
+ /* Bring CS42L42 out of Reset */
+ spec->gpio_data = snd_hda_codec_read(codec, CS8409_PIN_AFG, 0, AC_VERB_GET_GPIO_DATA, 0);
+@@ -893,13 +893,15 @@ static void cs42l42_resume(struct sub_codec *cs42l42)
+ /* Clear interrupts, by reading interrupt status registers */
+ cs8409_i2c_bulk_read(cs42l42, irq_regs, ARRAY_SIZE(irq_regs));
+
+- fsv_old = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL);
+- if (cs42l42->full_scale_vol == CS42L42_FULL_SCALE_VOL_0DB)
+- fsv_new = fsv_old & ~CS42L42_FULL_SCALE_VOL_MASK;
+- else
+- fsv_new = fsv_old & CS42L42_FULL_SCALE_VOL_MASK;
+- if (fsv_new != fsv_old)
+- cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv_new);
++ fsv = cs8409_i2c_read(cs42l42, CS42L42_HP_CTL);
++ if (cs42l42->full_scale_vol) {
++ // Set the full scale volume bit
++ fsv |= CS42L42_FULL_SCALE_VOL_MASK;
++ cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv);
++ }
++ // Unmute analog channels A and B
++ fsv = (fsv & ~CS42L42_ANA_MUTE_AB);
++ cs8409_i2c_write(cs42l42, CS42L42_HP_CTL, fsv);
+
+ /* we have to explicitly allow unsol event handling even during the
+ * resume phase so that the jack event is processed properly
+@@ -920,7 +922,7 @@ static void cs42l42_suspend(struct sub_codec *cs42l42)
+ { CS42L42_MIXER_CHA_VOL, 0x3F },
+ { CS42L42_MIXER_ADC_VOL, 0x3F },
+ { CS42L42_MIXER_CHB_VOL, 0x3F },
+- { CS42L42_HP_CTL, 0x0F },
++ { CS42L42_HP_CTL, 0x0D },
+ { CS42L42_ASP_RX_DAI0_EN, 0x00 },
+ { CS42L42_ASP_CLK_CFG, 0x00 },
+ { CS42L42_PWR_CTL1, 0xFE },
+diff --git a/sound/pci/hda/patch_cs8409.h b/sound/pci/hda/patch_cs8409.h
+index 5e48115caf096..14645d25e70fd 100644
+--- a/sound/pci/hda/patch_cs8409.h
++++ b/sound/pci/hda/patch_cs8409.h
+@@ -230,9 +230,10 @@ enum cs8409_coefficient_index_registers {
+ #define CS42L42_PDN_TIMEOUT_US (250000)
+ #define CS42L42_PDN_SLEEP_US (2000)
+ #define CS42L42_INIT_TIMEOUT_MS (45)
++#define CS42L42_ANA_MUTE_AB (0x0C)
+ #define CS42L42_FULL_SCALE_VOL_MASK (2)
+-#define CS42L42_FULL_SCALE_VOL_0DB (1)
+-#define CS42L42_FULL_SCALE_VOL_MINUS6DB (0)
++#define CS42L42_FULL_SCALE_VOL_0DB (0)
++#define CS42L42_FULL_SCALE_VOL_MINUS6DB (1)
+
+ /* Dell BULLSEYE / WARLOCK / CYBORG Specific Definitions */
+
+--
+2.39.5
+
--- /dev/null
+From baea8fdd63dc175a2ff1be50198a6f8b091e445f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Feb 2025 14:40:46 +0800
+Subject: ALSA: hda/realtek: Fixup ALC225 depop procedure
+
+From: Kailang Yang <kailang@realtek.com>
+
+[ Upstream commit 174448badb4409491bfba2e6b46f7aa078741c5e ]
+
+Headset MIC will no function when power_save=0.
+
+Fixes: 1fd50509fe14 ("ALSA: hda/realtek: Update ALC225 depop procedure")
+Link: https://bugzilla.kernel.org/show_bug.cgi?id=219743
+Signed-off-by: Kailang Yang <kailang@realtek.com>
+Link: https://lore.kernel.org/0474a095ab0044d0939ec4bf4362423d@realtek.com
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/pci/hda/patch_realtek.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 6c352602987ba..ffe3de617d5dd 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -3790,6 +3790,7 @@ static void alc225_init(struct hda_codec *codec)
+ AC_VERB_SET_AMP_GAIN_MUTE, AMP_OUT_UNMUTE);
+
+ msleep(75);
++ alc_update_coef_idx(codec, 0x4a, 3 << 10, 0);
+ alc_update_coefex_idx(codec, 0x57, 0x04, 0x0007, 0x4); /* Hight power */
+ }
+ }
+--
+2.39.5
+
--- /dev/null
+From 07d1a019f3d9e85c26a2d0b2a0a39b4c303bca57 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 18:00:30 +0100
+Subject: ALSA: seq: Drop UMP events when no UMP-conversion is set
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit e77aa4b2eaa7fb31b2a7a50214ecb946b2a8b0f6 ]
+
+When a destination client is a user client in the legacy MIDI mode and
+it sets the no-UMP-conversion flag, currently the all UMP events are
+still passed as-is. But this may confuse the user-space, because the
+event packet size is different from the legacy mode.
+
+Since we cannot handle UMP events in user clients unless it's running
+in the UMP client mode, we should filter out those events instead of
+accepting blindly. This patch addresses it by slightly adjusting the
+conditions for UMP event handling at the event delivery time.
+
+Fixes: 329ffe11a014 ("ALSA: seq: Allow suppressing UMP conversions")
+Link: https://lore.kernel.org/b77a2cd6-7b59-4eb0-a8db-22d507d3af5f@gmail.com
+Link: https://patch.msgid.link/20250217170034.21930-1-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/core/seq/seq_clientmgr.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/sound/core/seq/seq_clientmgr.c b/sound/core/seq/seq_clientmgr.c
+index 77b6ac9b5c11b..9955c4d54e42a 100644
+--- a/sound/core/seq/seq_clientmgr.c
++++ b/sound/core/seq/seq_clientmgr.c
+@@ -678,12 +678,18 @@ static int snd_seq_deliver_single_event(struct snd_seq_client *client,
+ dest_port->time_real);
+
+ #if IS_ENABLED(CONFIG_SND_SEQ_UMP)
+- if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) {
+- if (snd_seq_ev_is_ump(event)) {
++ if (snd_seq_ev_is_ump(event)) {
++ if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) {
+ result = snd_seq_deliver_from_ump(client, dest, dest_port,
+ event, atomic, hop);
+ goto __skip;
+- } else if (snd_seq_client_is_ump(dest)) {
++ } else if (dest->type == USER_CLIENT &&
++ !snd_seq_client_is_ump(dest)) {
++ result = 0; // drop the event
++ goto __skip;
++ }
++ } else if (snd_seq_client_is_ump(dest)) {
++ if (!(dest->filter & SNDRV_SEQ_FILTER_NO_CONVERT)) {
+ result = snd_seq_deliver_to_ump(client, dest, dest_port,
+ event, atomic, hop);
+ goto __skip;
+--
+2.39.5
+
--- /dev/null
+From 6d4631bef2aaeeb488f6d64db2d649ce5cae7a02 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 05:49:31 -0800
+Subject: arp: switch to dev_getbyhwaddr() in arp_req_set_public()
+
+From: Breno Leitao <leitao@debian.org>
+
+[ Upstream commit 4eae0ee0f1e6256d0b0b9dd6e72f1d9cf8f72e08 ]
+
+The arp_req_set_public() function is called with the rtnl lock held,
+which provides enough synchronization protection. This makes the RCU
+variant of dev_getbyhwaddr() unnecessary. Switch to using the simpler
+dev_getbyhwaddr() function since we already have the required rtnl
+locking.
+
+This change helps maintain consistency in the networking code by using
+the appropriate helper function for the existing locking context.
+Since we're not holding the RCU read lock in arp_req_set_public()
+existing code could trigger false positive locking warnings.
+
+Fixes: 941666c2e3e0 ("net: RCU conversion of dev_getbyhwaddr() and arp_ioctl()")
+Suggested-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-2-d3d6892db9e1@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/arp.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
+index f23a1ec6694cb..814300eee39de 100644
+--- a/net/ipv4/arp.c
++++ b/net/ipv4/arp.c
+@@ -1077,7 +1077,7 @@ static int arp_req_set_public(struct net *net, struct arpreq *r,
+ __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr;
+
+ if (!dev && (r->arp_flags & ATF_COM)) {
+- dev = dev_getbyhwaddr_rcu(net, r->arp_ha.sa_family,
++ dev = dev_getbyhwaddr(net, r->arp_ha.sa_family,
+ r->arp_ha.sa_data);
+ if (!dev)
+ return -ENODEV;
+--
+2.39.5
+
--- /dev/null
+From b79d9a14b44873728059d21e07cb036bcb684139 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 15:05:18 +0800
+Subject: ASoC: imx-audmix: remove cpu_mclk which is from cpu dai device
+
+From: Shengjiu Wang <shengjiu.wang@nxp.com>
+
+[ Upstream commit 571b69f2f9b1ec7cf7d0e9b79e52115a87a869c4 ]
+
+When defer probe happens, there may be below error:
+
+platform 59820000.sai: Resources present before probing
+
+The cpu_mclk clock is from the cpu dai device, if it is not released,
+then the cpu dai device probe will fail for the second time.
+
+The cpu_mclk is used to get rate for rate constraint, rate constraint
+may be specific for each platform, which is not necessary for machine
+driver, so remove it.
+
+Fixes: b86ef5367761 ("ASoC: fsl: Add Audio Mixer machine driver")
+Signed-off-by: Shengjiu Wang <shengjiu.wang@nxp.com>
+Link: https://patch.msgid.link/20250213070518.547375-1-shengjiu.wang@nxp.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/fsl/imx-audmix.c | 31 -------------------------------
+ 1 file changed, 31 deletions(-)
+
+diff --git a/sound/soc/fsl/imx-audmix.c b/sound/soc/fsl/imx-audmix.c
+index 231400661c906..50ecc5f51100e 100644
+--- a/sound/soc/fsl/imx-audmix.c
++++ b/sound/soc/fsl/imx-audmix.c
+@@ -23,7 +23,6 @@ struct imx_audmix {
+ struct snd_soc_card card;
+ struct platform_device *audmix_pdev;
+ struct platform_device *out_pdev;
+- struct clk *cpu_mclk;
+ int num_dai;
+ struct snd_soc_dai_link *dai;
+ int num_dai_conf;
+@@ -32,34 +31,11 @@ struct imx_audmix {
+ struct snd_soc_dapm_route *dapm_routes;
+ };
+
+-static const u32 imx_audmix_rates[] = {
+- 8000, 12000, 16000, 24000, 32000, 48000, 64000, 96000,
+-};
+-
+-static const struct snd_pcm_hw_constraint_list imx_audmix_rate_constraints = {
+- .count = ARRAY_SIZE(imx_audmix_rates),
+- .list = imx_audmix_rates,
+-};
+-
+ static int imx_audmix_fe_startup(struct snd_pcm_substream *substream)
+ {
+- struct snd_soc_pcm_runtime *rtd = snd_soc_substream_to_rtd(substream);
+- struct imx_audmix *priv = snd_soc_card_get_drvdata(rtd->card);
+ struct snd_pcm_runtime *runtime = substream->runtime;
+- struct device *dev = rtd->card->dev;
+- unsigned long clk_rate = clk_get_rate(priv->cpu_mclk);
+ int ret;
+
+- if (clk_rate % 24576000 == 0) {
+- ret = snd_pcm_hw_constraint_list(runtime, 0,
+- SNDRV_PCM_HW_PARAM_RATE,
+- &imx_audmix_rate_constraints);
+- if (ret < 0)
+- return ret;
+- } else {
+- dev_warn(dev, "mclk may be not supported %lu\n", clk_rate);
+- }
+-
+ ret = snd_pcm_hw_constraint_minmax(runtime, SNDRV_PCM_HW_PARAM_CHANNELS,
+ 1, 8);
+ if (ret < 0)
+@@ -323,13 +299,6 @@ static int imx_audmix_probe(struct platform_device *pdev)
+ }
+ put_device(&cpu_pdev->dev);
+
+- priv->cpu_mclk = devm_clk_get(&cpu_pdev->dev, "mclk1");
+- if (IS_ERR(priv->cpu_mclk)) {
+- ret = PTR_ERR(priv->cpu_mclk);
+- dev_err(&cpu_pdev->dev, "failed to get DAI mclk1: %d\n", ret);
+- return ret;
+- }
+-
+ priv->audmix_pdev = audmix_pdev;
+ priv->out_pdev = cpu_pdev;
+
+--
+2.39.5
+
--- /dev/null
+From cd22a803c0a4a307db012435592fdcd62e3993a2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 4 Feb 2025 16:13:10 +0000
+Subject: ASoC: rockchip: i2s-tdm: fix shift config for SND_SOC_DAIFMT_DSP_[AB]
+
+From: John Keeping <jkeeping@inmusicbrands.com>
+
+[ Upstream commit 6b24e67b4056ba83b1e95e005b7e50fdb1cc6cf4 ]
+
+Commit 2f45a4e289779 ("ASoC: rockchip: i2s_tdm: Fixup config for
+SND_SOC_DAIFMT_DSP_A/B") applied a partial change to fix the
+configuration for DSP A and DSP B formats.
+
+The shift control also needs updating to set the correct offset for
+frame data compared to LRCK. Set the correct values.
+
+Fixes: 081068fd64140 ("ASoC: rockchip: add support for i2s-tdm controller")
+Signed-off-by: John Keeping <jkeeping@inmusicbrands.com>
+Link: https://patch.msgid.link/20250204161311.2117240-1-jkeeping@inmusicbrands.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/rockchip/rockchip_i2s_tdm.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/rockchip/rockchip_i2s_tdm.c b/sound/soc/rockchip/rockchip_i2s_tdm.c
+index acd75e48851fc..7feefeb6b876d 100644
+--- a/sound/soc/rockchip/rockchip_i2s_tdm.c
++++ b/sound/soc/rockchip/rockchip_i2s_tdm.c
+@@ -451,11 +451,11 @@ static int rockchip_i2s_tdm_set_fmt(struct snd_soc_dai *cpu_dai,
+ break;
+ case SND_SOC_DAIFMT_DSP_A:
+ val = I2S_TXCR_TFS_TDM_PCM;
+- tdm_val = TDM_SHIFT_CTRL(0);
++ tdm_val = TDM_SHIFT_CTRL(2);
+ break;
+ case SND_SOC_DAIFMT_DSP_B:
+ val = I2S_TXCR_TFS_TDM_PCM;
+- tdm_val = TDM_SHIFT_CTRL(2);
++ tdm_val = TDM_SHIFT_CTRL(4);
+ break;
+ default:
+ ret = -EINVAL;
+--
+2.39.5
+
--- /dev/null
+From ea11353b740ca74abe1b533c1e273fc7d6f74c27 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 6 Feb 2025 10:46:42 +0200
+Subject: ASoC: SOF: ipc4-topology: Harden loops for looking up ALH copiers
+
+From: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+
+[ Upstream commit 6fd60136d256b3b948333ebdb3835f41a95ab7ef ]
+
+Other, non DAI copier widgets could have the same stream name (sname) as
+the ALH copier and in that case the copier->data is NULL, no alh_data is
+attached, which could lead to NULL pointer dereference.
+We could check for this NULL pointer in sof_ipc4_prepare_copier_module()
+and avoid the crash, but a similar loop in sof_ipc4_widget_setup_comp_dai()
+will miscalculate the ALH device count, causing broken audio.
+
+The correct fix is to harden the matching logic by making sure that the
+1. widget is a DAI widget - so dai = w->private is valid
+2. the dai (and thus the copier) is ALH copier
+
+Fixes: a150345aa758 ("ASoC: SOF: ipc4-topology: add SoundWire/ALH aggregation support")
+Reported-by: Seppo Ingalsuo <seppo.ingalsuo@linux.intel.com>
+Link: https://github.com/thesofproject/sof/pull/9652
+Signed-off-by: Peter Ujfalusi <peter.ujfalusi@linux.intel.com>
+Reviewed-by: Liam Girdwood <liam.r.girdwood@intel.com>
+Reviewed-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
+Reviewed-by: Bard Liao <yung-chuan.liao@linux.intel.com>
+Link: https://patch.msgid.link/20250206084642.14988-1-peter.ujfalusi@linux.intel.com
+Signed-off-by: Mark Brown <broonie@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ sound/soc/sof/ipc4-topology.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/sound/soc/sof/ipc4-topology.c b/sound/soc/sof/ipc4-topology.c
+index b55eb977e443d..70b7bfb080f47 100644
+--- a/sound/soc/sof/ipc4-topology.c
++++ b/sound/soc/sof/ipc4-topology.c
+@@ -765,10 +765,16 @@ static int sof_ipc4_widget_setup_comp_dai(struct snd_sof_widget *swidget)
+ }
+
+ list_for_each_entry(w, &sdev->widget_list, list) {
+- if (w->widget->sname &&
++ struct snd_sof_dai *alh_dai;
++
++ if (!WIDGET_IS_DAI(w->id) || !w->widget->sname ||
+ strcmp(w->widget->sname, swidget->widget->sname))
+ continue;
+
++ alh_dai = w->private;
++ if (alh_dai->type != SOF_DAI_INTEL_ALH)
++ continue;
++
+ blob->alh_cfg.device_count++;
+ }
+
+@@ -2061,11 +2067,13 @@ sof_ipc4_prepare_copier_module(struct snd_sof_widget *swidget,
+ list_for_each_entry(w, &sdev->widget_list, list) {
+ u32 node_type;
+
+- if (w->widget->sname &&
++ if (!WIDGET_IS_DAI(w->id) || !w->widget->sname ||
+ strcmp(w->widget->sname, swidget->widget->sname))
+ continue;
+
+ dai = w->private;
++ if (dai->type != SOF_DAI_INTEL_ALH)
++ continue;
+ alh_copier = (struct sof_ipc4_copier *)dai->private;
+ alh_data = &alh_copier->data;
+ node_type = SOF_IPC4_GET_NODE_TYPE(alh_data->gtw_cfg.node_id);
+--
+2.39.5
+
--- /dev/null
+From f772ef4f84fc28203c18c7cc2dac2d17b3110616 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 13 Jan 2025 22:43:23 +0800
+Subject: Bluetooth: qca: Fix poor RF performance for WCN6855
+
+From: Zijun Hu <quic_zijuhu@quicinc.com>
+
+[ Upstream commit a2fad248947d702ed3dcb52b8377c1a3ae201e44 ]
+
+For WCN6855, board ID specific NVM needs to be downloaded once board ID
+is available, but the default NVM is always downloaded currently.
+
+The wrong NVM causes poor RF performance, and effects user experience
+for several types of laptop with WCN6855 on the market.
+
+Fix by downloading board ID specific NVM if board ID is available.
+
+Fixes: 095327fede00 ("Bluetooth: hci_qca: Add support for QTI Bluetooth chip wcn6855")
+Cc: stable@vger.kernel.org # 6.4
+Signed-off-by: Zijun Hu <quic_zijuhu@quicinc.com>
+Tested-by: Johan Hovold <johan+linaro@kernel.org>
+Reviewed-by: Johan Hovold <johan+linaro@kernel.org>
+Tested-by: Steev Klimaszewski <steev@kali.org> #Thinkpad X13s
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/bluetooth/btqca.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
+index 5cb1fd1a0c7b5..04d02c746ec0f 100644
+--- a/drivers/bluetooth/btqca.c
++++ b/drivers/bluetooth/btqca.c
+@@ -905,8 +905,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ "qca/msnv%02x.bin", rom_ver);
+ break;
+ case QCA_WCN6855:
+- snprintf(config.fwname, sizeof(config.fwname),
+- "qca/hpnv%02x.bin", rom_ver);
++ qca_read_fw_board_id(hdev, &boardid);
++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
++ "hpnv", soc_type, ver, rom_ver, boardid);
+ break;
+ case QCA_WCN7850:
+ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
+--
+2.39.5
+
--- /dev/null
+From 1519f63e5e5233783b95043445ebf187f63e9cd6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jan 2025 17:26:49 +0800
+Subject: Bluetooth: qca: Update firmware-name to support board specific nvm
+
+From: Cheng Jiang <quic_chejiang@quicinc.com>
+
+[ Upstream commit a4c5a468c6329bde7dfd46bacff2cbf5f8a8152e ]
+
+Different connectivity boards may be attached to the same platform. For
+example, QCA6698-based boards can support either a two-antenna or
+three-antenna solution, both of which work on the sa8775p-ride platform.
+Due to differences in connectivity boards and variations in RF
+performance from different foundries, different NVM configurations are
+used based on the board ID.
+
+Therefore, in the firmware-name property, if the NVM file has an
+extension, the NVM file will be used. Otherwise, the system will first
+try the .bNN (board ID) file, and if that fails, it will fall back to
+the .bin file.
+
+Possible configurations:
+firmware-name = "QCA6698/hpnv21";
+firmware-name = "QCA6698/hpnv21.bin";
+
+Signed-off-by: Cheng Jiang <quic_chejiang@quicinc.com>
+Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
+Stable-dep-of: a2fad248947d ("Bluetooth: qca: Fix poor RF performance for WCN6855")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/bluetooth/btqca.c | 113 ++++++++++++++++++++++++++++----------
+ 1 file changed, 85 insertions(+), 28 deletions(-)
+
+diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
+index dfbbac92242a8..5cb1fd1a0c7b5 100644
+--- a/drivers/bluetooth/btqca.c
++++ b/drivers/bluetooth/btqca.c
+@@ -272,6 +272,39 @@ int qca_send_pre_shutdown_cmd(struct hci_dev *hdev)
+ }
+ EXPORT_SYMBOL_GPL(qca_send_pre_shutdown_cmd);
+
++static bool qca_filename_has_extension(const char *filename)
++{
++ const char *suffix = strrchr(filename, '.');
++
++ /* File extensions require a dot, but not as the first or last character */
++ if (!suffix || suffix == filename || *(suffix + 1) == '\0')
++ return 0;
++
++ /* Avoid matching directories with names that look like files with extensions */
++ return !strchr(suffix, '/');
++}
++
++static bool qca_get_alt_nvm_file(char *filename, size_t max_size)
++{
++ char fwname[64];
++ const char *suffix;
++
++ /* nvm file name has an extension, replace with .bin */
++ if (qca_filename_has_extension(filename)) {
++ suffix = strrchr(filename, '.');
++ strscpy(fwname, filename, suffix - filename + 1);
++ snprintf(fwname + (suffix - filename),
++ sizeof(fwname) - (suffix - filename), ".bin");
++ /* If nvm file is already the default one, return false to skip the retry. */
++ if (strcmp(fwname, filename) == 0)
++ return false;
++
++ snprintf(filename, max_size, "%s", fwname);
++ return true;
++ }
++ return false;
++}
++
+ static int qca_tlv_check_data(struct hci_dev *hdev,
+ struct qca_fw_config *config,
+ u8 *fw_data, size_t fw_size,
+@@ -564,6 +597,19 @@ static int qca_download_firmware(struct hci_dev *hdev,
+ config->fwname, ret);
+ return ret;
+ }
++ }
++ /* If the board-specific file is missing, try loading the default
++ * one, unless that was attempted already.
++ */
++ else if (config->type == TLV_TYPE_NVM &&
++ qca_get_alt_nvm_file(config->fwname, sizeof(config->fwname))) {
++ bt_dev_info(hdev, "QCA Downloading %s", config->fwname);
++ ret = request_firmware(&fw, config->fwname, &hdev->dev);
++ if (ret) {
++ bt_dev_err(hdev, "QCA Failed to request file: %s (%d)",
++ config->fwname, ret);
++ return ret;
++ }
+ } else {
+ bt_dev_err(hdev, "QCA Failed to request file: %s (%d)",
+ config->fwname, ret);
+@@ -700,34 +746,38 @@ static int qca_check_bdaddr(struct hci_dev *hdev, const struct qca_fw_config *co
+ return 0;
+ }
+
+-static void qca_generate_hsp_nvm_name(char *fwname, size_t max_size,
++static void qca_get_nvm_name_by_board(char *fwname, size_t max_size,
++ const char *stem, enum qca_btsoc_type soc_type,
+ struct qca_btsoc_version ver, u8 rom_ver, u16 bid)
+ {
+ const char *variant;
++ const char *prefix;
+
+- /* hsp gf chip */
+- if ((le32_to_cpu(ver.soc_id) & QCA_HSP_GF_SOC_MASK) == QCA_HSP_GF_SOC_ID)
+- variant = "g";
+- else
+- variant = "";
++ /* Set the default value to variant and prefix */
++ variant = "";
++ prefix = "b";
+
+- if (bid == 0x0)
+- snprintf(fwname, max_size, "qca/hpnv%02x%s.bin", rom_ver, variant);
+- else
+- snprintf(fwname, max_size, "qca/hpnv%02x%s.%x", rom_ver, variant, bid);
+-}
++ if (soc_type == QCA_QCA2066)
++ prefix = "";
+
+-static inline void qca_get_nvm_name_generic(struct qca_fw_config *cfg,
+- const char *stem, u8 rom_ver, u16 bid)
+-{
+- if (bid == 0x0)
+- snprintf(cfg->fwname, sizeof(cfg->fwname), "qca/%snv%02x.bin", stem, rom_ver);
+- else if (bid & 0xff00)
+- snprintf(cfg->fwname, sizeof(cfg->fwname),
+- "qca/%snv%02x.b%x", stem, rom_ver, bid);
+- else
+- snprintf(cfg->fwname, sizeof(cfg->fwname),
+- "qca/%snv%02x.b%02x", stem, rom_ver, bid);
++ if (soc_type == QCA_WCN6855 || soc_type == QCA_QCA2066) {
++ /* If the chip is manufactured by GlobalFoundries */
++ if ((le32_to_cpu(ver.soc_id) & QCA_HSP_GF_SOC_MASK) == QCA_HSP_GF_SOC_ID)
++ variant = "g";
++ }
++
++ if (rom_ver != 0) {
++ if (bid == 0x0 || bid == 0xffff)
++ snprintf(fwname, max_size, "qca/%s%02x%s.bin", stem, rom_ver, variant);
++ else
++ snprintf(fwname, max_size, "qca/%s%02x%s.%s%02x", stem, rom_ver,
++ variant, prefix, bid);
++ } else {
++ if (bid == 0x0 || bid == 0xffff)
++ snprintf(fwname, max_size, "qca/%s%s.bin", stem, variant);
++ else
++ snprintf(fwname, max_size, "qca/%s%s.%s%02x", stem, variant, prefix, bid);
++ }
+ }
+
+ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+@@ -816,8 +866,14 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ /* Download NVM configuration */
+ config.type = TLV_TYPE_NVM;
+ if (firmware_name) {
+- snprintf(config.fwname, sizeof(config.fwname),
+- "qca/%s", firmware_name);
++ /* The firmware name has an extension, use it directly */
++ if (qca_filename_has_extension(firmware_name)) {
++ snprintf(config.fwname, sizeof(config.fwname), "qca/%s", firmware_name);
++ } else {
++ qca_read_fw_board_id(hdev, &boardid);
++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
++ firmware_name, soc_type, ver, 0, boardid);
++ }
+ } else {
+ switch (soc_type) {
+ case QCA_WCN3990:
+@@ -836,8 +892,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ "qca/apnv%02x.bin", rom_ver);
+ break;
+ case QCA_QCA2066:
+- qca_generate_hsp_nvm_name(config.fwname,
+- sizeof(config.fwname), ver, rom_ver, boardid);
++ qca_get_nvm_name_by_board(config.fwname,
++ sizeof(config.fwname), "hpnv", soc_type, ver,
++ rom_ver, boardid);
+ break;
+ case QCA_QCA6390:
+ snprintf(config.fwname, sizeof(config.fwname),
+@@ -852,9 +909,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
+ "qca/hpnv%02x.bin", rom_ver);
+ break;
+ case QCA_WCN7850:
+- qca_get_nvm_name_generic(&config, "hmt", rom_ver, boardid);
++ qca_get_nvm_name_by_board(config.fwname, sizeof(config.fwname),
++ "hmtnv", soc_type, ver, rom_ver, boardid);
+ break;
+-
+ default:
+ snprintf(config.fwname, sizeof(config.fwname),
+ "qca/nvm_%08x.bin", soc_ver);
+--
+2.39.5
+
--- /dev/null
+From c1d0ddb56151e3bea755997ca0652c9ebb080aef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 16:43:55 +1030
+Subject: btrfs: fix double accounting race when btrfs_run_delalloc_range()
+ failed
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 72dad8e377afa50435940adfb697e070d3556670 ]
+
+[BUG]
+When running btrfs with block size (4K) smaller than page size (64K,
+aarch64), there is a very high chance to crash the kernel at
+generic/750, with the following messages:
+(before the call traces, there are 3 extra debug messages added)
+
+ BTRFS warning (device dm-3): read-write for sector size 4096 with page size 65536 is experimental
+ BTRFS info (device dm-3): checking UUID tree
+ hrtimer: interrupt took 5451385 ns
+ BTRFS error (device dm-3): cow_file_range failed, root=4957 inode=257 start=1605632 len=69632: -28
+ BTRFS error (device dm-3): run_delalloc_nocow failed, root=4957 inode=257 start=1605632 len=69632: -28
+ BTRFS error (device dm-3): failed to run delalloc range, root=4957 ino=257 folio=1572864 submit_bitmap=8-15 start=1605632 len=69632: -28
+ ------------[ cut here ]------------
+ WARNING: CPU: 2 PID: 3020984 at ordered-data.c:360 can_finish_ordered_extent+0x370/0x3b8 [btrfs]
+ CPU: 2 UID: 0 PID: 3020984 Comm: kworker/u24:1 Tainted: G OE 6.13.0-rc1-custom+ #89
+ Tainted: [O]=OOT_MODULE, [E]=UNSIGNED_MODULE
+ Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
+ Workqueue: events_unbound btrfs_async_reclaim_data_space [btrfs]
+ pc : can_finish_ordered_extent+0x370/0x3b8 [btrfs]
+ lr : can_finish_ordered_extent+0x1ec/0x3b8 [btrfs]
+ Call trace:
+ can_finish_ordered_extent+0x370/0x3b8 [btrfs] (P)
+ can_finish_ordered_extent+0x1ec/0x3b8 [btrfs] (L)
+ btrfs_mark_ordered_io_finished+0x130/0x2b8 [btrfs]
+ extent_writepage+0x10c/0x3b8 [btrfs]
+ extent_write_cache_pages+0x21c/0x4e8 [btrfs]
+ btrfs_writepages+0x94/0x160 [btrfs]
+ do_writepages+0x74/0x190
+ filemap_fdatawrite_wbc+0x74/0xa0
+ start_delalloc_inodes+0x17c/0x3b0 [btrfs]
+ btrfs_start_delalloc_roots+0x17c/0x288 [btrfs]
+ shrink_delalloc+0x11c/0x280 [btrfs]
+ flush_space+0x288/0x328 [btrfs]
+ btrfs_async_reclaim_data_space+0x180/0x228 [btrfs]
+ process_one_work+0x228/0x680
+ worker_thread+0x1bc/0x360
+ kthread+0x100/0x118
+ ret_from_fork+0x10/0x20
+ ---[ end trace 0000000000000000 ]---
+ BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1605632 OE len=16384 to_dec=16384 left=0
+ BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1622016 OE len=12288 to_dec=12288 left=0
+ Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008
+ BTRFS critical (device dm-3): bad ordered extent accounting, root=4957 ino=257 OE offset=1634304 OE len=8192 to_dec=4096 left=0
+ CPU: 1 UID: 0 PID: 3286940 Comm: kworker/u24:3 Tainted: G W OE 6.13.0-rc1-custom+ #89
+ Hardware name: QEMU KVM Virtual Machine, BIOS unknown 2/2/2022
+ Workqueue: btrfs_work_helper [btrfs] (btrfs-endio-write)
+ pstate: 404000c5 (nZcv daIF +PAN -UAO -TCO -DIT -SSBS BTYPE=--)
+ pc : process_one_work+0x110/0x680
+ lr : worker_thread+0x1bc/0x360
+ Call trace:
+ process_one_work+0x110/0x680 (P)
+ worker_thread+0x1bc/0x360 (L)
+ worker_thread+0x1bc/0x360
+ kthread+0x100/0x118
+ ret_from_fork+0x10/0x20
+ Code: f84086a1 f9000fe1 53041c21 b9003361 (f9400661)
+ ---[ end trace 0000000000000000 ]---
+ Kernel panic - not syncing: Oops: Fatal exception
+ SMP: stopping secondary CPUs
+ SMP: failed to stop secondary CPUs 2-3
+ Dumping ftrace buffer:
+ (ftrace buffer empty)
+ Kernel Offset: 0x275bb9540000 from 0xffff800080000000
+ PHYS_OFFSET: 0xffff8fbba0000000
+ CPU features: 0x100,00000070,00801250,8201720b
+
+[CAUSE]
+The above warning is triggered immediately after the delalloc range
+failure, this happens in the following sequence:
+
+- Range [1568K, 1636K) is dirty
+
+ 1536K 1568K 1600K 1636K 1664K
+ | |/////////|////////| |
+
+ Where 1536K, 1600K and 1664K are page boundaries (64K page size)
+
+- Enter extent_writepage() for page 1536K
+
+- Enter run_delalloc_nocow() with locked page 1536K and range
+ [1568K, 1636K)
+ This is due to the inode having preallocated extents.
+
+- Enter cow_file_range() with locked page 1536K and range
+ [1568K, 1636K)
+
+- btrfs_reserve_extent() only reserved two extents
+ The main loop of cow_file_range() only reserved two data extents,
+
+ Now we have:
+
+ 1536K 1568K 1600K 1636K 1664K
+ | |<-->|<--->|/|///////| |
+ 1584K 1596K
+ Range [1568K, 1596K) has an ordered extent reserved.
+
+- btrfs_reserve_extent() failed inside cow_file_range() for file offset
+ 1596K
+ This is already a bug in our space reservation code, but for now let's
+ focus on the error handling path.
+
+ Now cow_file_range() returned -ENOSPC.
+
+- btrfs_run_delalloc_range() do error cleanup <<< ROOT CAUSE
+ Call btrfs_cleanup_ordered_extents() with locked folio 1536K and range
+ [1568K, 1636K)
+
+ Function btrfs_cleanup_ordered_extents() normally needs to skip the
+ ranges inside the folio, as it will normally be cleaned up by
+ extent_writepage().
+
+ Such split error handling is already problematic in the first place.
+
+ What's worse is the folio range skipping itself, which is not taking
+ subpage cases into consideration at all, it will only skip the range
+ if the page start >= the range start.
+ In our case, the page start < the range start, since for subpage cases
+ we can have delalloc ranges inside the folio but not covering the
+ folio.
+
+ So it doesn't skip the page range at all.
+ This means all the ordered extents, both [1568K, 1584K) and
+ [1584K, 1596K) will be marked as IOERR.
+
+ And these two ordered extents have no more pending ios, they are marked
+ finished, and *QUEUED* to be deleted from the io tree.
+
+- extent_writepage() do error cleanup
+ Call btrfs_mark_ordered_io_finished() for the range [1536K, 1600K).
+
+ Although ranges [1568K, 1584K) and [1584K, 1596K) are finished, the
+ deletion from io tree is async, it may or may not happen at this
+ time.
+
+ If the ranges have not yet been removed, we will do double cleaning on
+ those ranges, triggering the above ordered extent warnings.
+
+In theory there are other bugs, like the cleanup in extent_writepage()
+can cause double accounting on ranges that are submitted asynchronously
+(compression for example).
+
+But that's much harder to trigger because normally we do not mix regular
+and compression delalloc ranges.
+
+[FIX]
+The folio range split is already buggy and not subpage compatible, it
+was introduced a long time ago where subpage support was not even considered.
+
+So instead of splitting the ordered extents cleanup into the folio range
+and out of folio range, do all the cleanup inside writepage_delalloc().
+
+- Pass @NULL as locked_folio for btrfs_cleanup_ordered_extents() in
+ btrfs_run_delalloc_range()
+
+- Skip the btrfs_cleanup_ordered_extents() if writepage_delalloc()
+ failed
+
+ So all ordered extents are only cleaned up by
+ btrfs_run_delalloc_range().
+
+- Handle the ranges that already have ordered extents allocated
+ If part of the folio already has ordered extent allocated, and
+ btrfs_run_delalloc_range() failed, we also need to cleanup that range.
+
+Now we have a concentrated error handling for ordered extents during
+btrfs_run_delalloc_range().
+
+Fixes: d1051d6ebf8e ("btrfs: Fix error handling in btrfs_cleanup_ordered_extents")
+CC: stable@vger.kernel.org # 5.15+
+Reviewed-by: Boris Burkov <boris@bur.io>
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c | 59 +++++++++++++++++++++++++++++++++++---------
+ fs/btrfs/inode.c | 3 +--
+ 2 files changed, 49 insertions(+), 13 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 299507c0008d9..3d138bfd59e18 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1145,14 +1145,19 @@ static bool find_next_delalloc_bitmap(struct folio *folio,
+ }
+
+ /*
+- * helper for extent_writepage(), doing all of the delayed allocation setup.
++ * Do all of the delayed allocation setup.
+ *
+- * This returns 1 if btrfs_run_delalloc_range function did all the work required
+- * to write the page (copy into inline extent). In this case the IO has
+- * been started and the page is already unlocked.
++ * Return >0 if all the dirty blocks are submitted async (compression) or inlined.
++ * The @folio should no longer be touched (treat it as already unlocked).
+ *
+- * This returns 0 if all went well (page still locked)
+- * This returns < 0 if there were errors (page still locked)
++ * Return 0 if there is still dirty block that needs to be submitted through
++ * extent_writepage_io().
++ * bio_ctrl->submit_bitmap will indicate which blocks of the folio should be
++ * submitted, and @folio is still kept locked.
++ *
++ * Return <0 if there is any error hit.
++ * Any allocated ordered extent range covering this folio will be marked
++ * finished (IOERR), and @folio is still kept locked.
+ */
+ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
+ struct folio *folio,
+@@ -1170,6 +1175,16 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
+ * last delalloc end.
+ */
+ u64 last_delalloc_end = 0;
++ /*
++ * The range end (exclusive) of the last successfully finished delalloc
++ * range.
++ * Any range covered by ordered extent must either be manually marked
++ * finished (error handling), or has IO submitted (and finish the
++ * ordered extent normally).
++ *
++ * This records the end of ordered extent cleanup if we hit an error.
++ */
++ u64 last_finished_delalloc_end = page_start;
+ u64 delalloc_start = page_start;
+ u64 delalloc_end = page_end;
+ u64 delalloc_to_write = 0;
+@@ -1238,11 +1253,19 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
+ found_len = last_delalloc_end + 1 - found_start;
+
+ if (ret >= 0) {
++ /*
++ * Some delalloc range may be created by previous folios.
++ * Thus we still need to clean up this range during error
++ * handling.
++ */
++ last_finished_delalloc_end = found_start;
+ /* No errors hit so far, run the current delalloc range. */
+ ret = btrfs_run_delalloc_range(inode, folio,
+ found_start,
+ found_start + found_len - 1,
+ wbc);
++ if (ret >= 0)
++ last_finished_delalloc_end = found_start + found_len;
+ } else {
+ /*
+ * We've hit an error during previous delalloc range,
+@@ -1277,8 +1300,22 @@ static noinline_for_stack int writepage_delalloc(struct btrfs_inode *inode,
+
+ delalloc_start = found_start + found_len;
+ }
+- if (ret < 0)
++ /*
++ * It's possible we had some ordered extents created before we hit
++ * an error, cleanup non-async successfully created delalloc ranges.
++ */
++ if (unlikely(ret < 0)) {
++ unsigned int bitmap_size = min(
++ (last_finished_delalloc_end - page_start) >>
++ fs_info->sectorsize_bits,
++ fs_info->sectors_per_page);
++
++ for_each_set_bit(bit, &bio_ctrl->submit_bitmap, bitmap_size)
++ btrfs_mark_ordered_io_finished(inode, folio,
++ page_start + (bit << fs_info->sectorsize_bits),
++ fs_info->sectorsize, false);
+ return ret;
++ }
+ out:
+ if (last_delalloc_end)
+ delalloc_end = last_delalloc_end;
+@@ -1512,13 +1549,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
+
+ bio_ctrl->wbc->nr_to_write--;
+
+-done:
+- if (ret) {
++ if (ret)
+ btrfs_mark_ordered_io_finished(inode, folio,
+ page_start, PAGE_SIZE, !ret);
+- mapping_set_error(folio->mapping, ret);
+- }
+
++done:
++ if (ret < 0)
++ mapping_set_error(folio->mapping, ret);
+ /*
+ * Only unlock ranges that are submitted. As there can be some async
+ * submitted ranges inside the folio.
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index d1c8f6730a568..b4160b1c77573 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -2385,8 +2385,7 @@ int btrfs_run_delalloc_range(struct btrfs_inode *inode, struct folio *locked_fol
+
+ out:
+ if (ret < 0)
+- btrfs_cleanup_ordered_extents(inode, locked_folio, start,
+- end - start + 1);
++ btrfs_cleanup_ordered_extents(inode, NULL, start, end - start + 1);
+ return ret;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From 421936f76d83c8719c06552c97bc70a1ef5dd4d2 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 12 Dec 2024 16:43:56 +1030
+Subject: btrfs: fix double accounting race when extent_writepage_io() failed
+
+From: Qu Wenruo <wqu@suse.com>
+
+[ Upstream commit 8bf334beb3496da3c3fbf3daf3856f7eec70dacc ]
+
+[BUG]
+If submit_one_sector() failed inside extent_writepage_io() for sector
+size < page size cases (e.g. 4K sector size and 64K page size), then
+we can hit double ordered extent accounting error.
+
+This should be very rare, as submit_one_sector() only fails when we
+failed to grab the extent map, and such extent map should exist inside
+the memory and has been pinned.
+
+[CAUSE]
+For example we have the following folio layout:
+
+ 0 4K 32K 48K 60K 64K
+ |//| |//////| |///|
+
+Where |///| is the dirty range we need to writeback. The 3 different
+dirty ranges are submitted for regular COW.
+
+Now we hit the following sequence:
+
+- submit_one_sector() returned 0 for [0, 4K)
+
+- submit_one_sector() returned 0 for [32K, 48K)
+
+- submit_one_sector() returned error for [60K, 64K)
+
+- btrfs_mark_ordered_io_finished() called for the whole folio
+ This will mark the following ranges as finished:
+ * [0, 4K)
+ * [32K, 48K)
+ Both ranges have their IO already submitted, this cleanup will
+ lead to double accounting.
+
+ * [60K, 64K)
+ That's the correct cleanup.
+
+The only good news is, this error is only theoretical, as the target
+extent map is always pinned, thus we should directly grab it from
+memory, other than reading it from the disk.
+
+[FIX]
+Instead of calling btrfs_mark_ordered_io_finished() for the whole folio
+range, which can touch ranges we should not touch, instead
+move the error handling inside extent_writepage_io().
+
+So that we can cleanup exact sectors that ought to be submitted but failed.
+
+This provides much more accurate cleanup, avoiding the double accounting.
+
+CC: stable@vger.kernel.org # 5.15+
+Signed-off-by: Qu Wenruo <wqu@suse.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c | 37 ++++++++++++++++++++++++-------------
+ 1 file changed, 24 insertions(+), 13 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 3d138bfd59e18..0dd24d1289863 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1431,6 +1431,7 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ unsigned long range_bitmap = 0;
+ bool submitted_io = false;
++ bool error = false;
+ const u64 folio_start = folio_pos(folio);
+ u64 cur;
+ int bit;
+@@ -1473,11 +1474,26 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
+ break;
+ }
+ ret = submit_one_sector(inode, folio, cur, bio_ctrl, i_size);
+- if (ret < 0)
+- goto out;
++ if (unlikely(ret < 0)) {
++ /*
++ * bio_ctrl may contain a bio crossing several folios.
++ * Submit it immediately so that the bio has a chance
++ * to finish normally, other than marked as error.
++ */
++ submit_one_bio(bio_ctrl);
++ /*
++ * Failed to grab the extent map which should be very rare.
++ * Since there is no bio submitted to finish the ordered
++ * extent, we have to manually finish this sector.
++ */
++ btrfs_mark_ordered_io_finished(inode, folio, cur,
++ fs_info->sectorsize, false);
++ error = true;
++ continue;
++ }
+ submitted_io = true;
+ }
+-out:
++
+ /*
+ * If we didn't submitted any sector (>= i_size), folio dirty get
+ * cleared but PAGECACHE_TAG_DIRTY is not cleared (only cleared
+@@ -1485,8 +1501,11 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
+ *
+ * Here we set writeback and clear for the range. If the full folio
+ * is no longer dirty then we clear the PAGECACHE_TAG_DIRTY tag.
++ *
++ * If we hit any error, the corresponding sector will still be dirty
++ * thus no need to clear PAGECACHE_TAG_DIRTY.
+ */
+- if (!submitted_io) {
++ if (!submitted_io && !error) {
+ btrfs_folio_set_writeback(fs_info, folio, start, len);
+ btrfs_folio_clear_writeback(fs_info, folio, start, len);
+ }
+@@ -1506,7 +1525,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
+ {
+ struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
+ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+- const u64 page_start = folio_pos(folio);
+ int ret;
+ size_t pg_offset;
+ loff_t i_size = i_size_read(&inode->vfs_inode);
+@@ -1549,10 +1567,6 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
+
+ bio_ctrl->wbc->nr_to_write--;
+
+- if (ret)
+- btrfs_mark_ordered_io_finished(inode, folio,
+- page_start, PAGE_SIZE, !ret);
+-
+ done:
+ if (ret < 0)
+ mapping_set_error(folio->mapping, ret);
+@@ -2332,11 +2346,8 @@ void extent_write_locked_range(struct inode *inode, const struct folio *locked_f
+ if (ret == 1)
+ goto next_page;
+
+- if (ret) {
+- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
+- cur, cur_len, !ret);
++ if (ret)
+ mapping_set_error(mapping, ret);
+- }
+ btrfs_folio_end_lock(fs_info, folio, cur, cur_len);
+ if (ret < 0)
+ found_error = true;
+--
+2.39.5
+
--- /dev/null
+From c462636b72fbaaad266dee409191bbb1c21e4a81 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 9 Jan 2025 11:24:17 +0100
+Subject: btrfs: use btrfs_inode in extent_writepage()
+
+From: David Sterba <dsterba@suse.com>
+
+[ Upstream commit 011a9a1f244656cc3cbde47edba2b250f794d440 ]
+
+As extent_writepage() is internal helper we should use our inode type,
+so change it from struct inode.
+
+Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
+Reviewed-by: Anand Jain <anand.jain@oracle.com>
+Signed-off-by: David Sterba <dsterba@suse.com>
+Stable-dep-of: 8bf334beb349 ("btrfs: fix double accounting race when extent_writepage_io() failed")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ fs/btrfs/extent_io.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index d14ecbe24d775..299507c0008d9 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -1467,15 +1467,15 @@ static noinline_for_stack int extent_writepage_io(struct btrfs_inode *inode,
+ */
+ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl)
+ {
+- struct inode *inode = folio->mapping->host;
+- struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
++ struct btrfs_inode *inode = BTRFS_I(folio->mapping->host);
++ struct btrfs_fs_info *fs_info = inode->root->fs_info;
+ const u64 page_start = folio_pos(folio);
+ int ret;
+ size_t pg_offset;
+- loff_t i_size = i_size_read(inode);
++ loff_t i_size = i_size_read(&inode->vfs_inode);
+ unsigned long end_index = i_size >> PAGE_SHIFT;
+
+- trace_extent_writepage(folio, inode, bio_ctrl->wbc);
++ trace_extent_writepage(folio, &inode->vfs_inode, bio_ctrl->wbc);
+
+ WARN_ON(!folio_test_locked(folio));
+
+@@ -1499,13 +1499,13 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
+ if (ret < 0)
+ goto done;
+
+- ret = writepage_delalloc(BTRFS_I(inode), folio, bio_ctrl);
++ ret = writepage_delalloc(inode, folio, bio_ctrl);
+ if (ret == 1)
+ return 0;
+ if (ret)
+ goto done;
+
+- ret = extent_writepage_io(BTRFS_I(inode), folio, folio_pos(folio),
++ ret = extent_writepage_io(inode, folio, folio_pos(folio),
+ PAGE_SIZE, bio_ctrl, i_size);
+ if (ret == 1)
+ return 0;
+@@ -1514,7 +1514,7 @@ static int extent_writepage(struct folio *folio, struct btrfs_bio_ctrl *bio_ctrl
+
+ done:
+ if (ret) {
+- btrfs_mark_ordered_io_finished(BTRFS_I(inode), folio,
++ btrfs_mark_ordered_io_finished(inode, folio,
+ page_start, PAGE_SIZE, !ret);
+ mapping_set_error(folio->mapping, ret);
+ }
+--
+2.39.5
+
--- /dev/null
+From 3e24ac9c451c2237e10dfeb354c563feecc403f8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 15 Jan 2025 17:43:29 +0800
+Subject: drm/amd/display: Correct register address in dcn35
+
+From: loanchen <lo-an.chen@amd.com>
+
+[ Upstream commit f88192d2335b5a911fcfa09338cc00624571ec5e ]
+
+[Why]
+the offset address of mmCLK5_spll_field_8 was incorrect for dcn35
+which causes SSC not to be enabled.
+
+Reviewed-by: Charlene Liu <charlene.liu@amd.com>
+Signed-off-by: Lo-An Chen <lo-an.chen@amd.com>
+Signed-off-by: Zaeem Mohamed <zaeem.mohamed@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+index 6d6cb8ef59db0..2e435ee363fed 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+@@ -89,7 +89,7 @@
+ #define mmCLK1_CLK4_ALLOW_DS 0x16EA8
+ #define mmCLK1_CLK5_ALLOW_DS 0x16EB1
+
+-#define mmCLK5_spll_field_8 0x1B04B
++#define mmCLK5_spll_field_8 0x1B24B
+ #define mmDENTIST_DISPCLK_CNTL 0x0124
+ #define regDENTIST_DISPCLK_CNTL 0x0064
+ #define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
+--
+2.39.5
+
--- /dev/null
+From c4f593d99d8e578ce9db588cd41b199c8a54c973 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 29 Nov 2024 17:18:50 -0500
+Subject: drm/amd/display: update dcn351 used clock offset
+
+From: Charlene Liu <Charlene.Liu@amd.com>
+
+[ Upstream commit a1fc2837f4960e84e9375e12292584ad2ae472da ]
+
+[why]
+hw register offset delta
+
+Reviewed-by: Martin Leung <martin.leung@amd.com>
+Signed-off-by: Charlene Liu <Charlene.Liu@amd.com>
+Signed-off-by: Aurabindo Pillai <aurabindo.pillai@amd.com>
+Tested-by: Daniel Wheeler <daniel.wheeler@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: f88192d2335b ("drm/amd/display: Correct register address in dcn35")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../gpu/drm/amd/display/dc/clk_mgr/Makefile | 2 +-
+ .../gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c | 5 +-
+ .../display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c | 140 ++++++++++++++++++
+ .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c | 132 +++++++++++++----
+ .../display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h | 4 +
+ .../amd/display/dc/inc/hw/clk_mgr_internal.h | 59 ++++++++
+ 6 files changed, 308 insertions(+), 34 deletions(-)
+ create mode 100644 drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+index ab1132bc896a3..d9955c5d2e5ed 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/Makefile
+@@ -174,7 +174,7 @@ AMD_DISPLAY_FILES += $(AMD_DAL_CLK_MGR_DCN32)
+ ###############################################################################
+ # DCN35
+ ###############################################################################
+-CLK_MGR_DCN35 = dcn35_smu.o dcn35_clk_mgr.o
++CLK_MGR_DCN35 = dcn35_smu.o dcn351_clk_mgr.o dcn35_clk_mgr.o
+
+ AMD_DAL_CLK_MGR_DCN35 = $(addprefix $(AMDDALPATH)/dc/clk_mgr/dcn35/,$(CLK_MGR_DCN35))
+
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+index 0e243f4344d05..4c3e58c730b11 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/clk_mgr.c
+@@ -355,8 +355,11 @@ struct clk_mgr *dc_clk_mgr_create(struct dc_context *ctx, struct pp_smu_funcs *p
+ BREAK_TO_DEBUGGER();
+ return NULL;
+ }
++ if (ctx->dce_version == DCN_VERSION_3_51)
++ dcn351_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
++ else
++ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+
+- dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
+ return &clk_mgr->base.base;
+ }
+ break;
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
+new file mode 100644
+index 0000000000000..6a6ae618650b6
+--- /dev/null
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn351_clk_mgr.c
+@@ -0,0 +1,140 @@
++/*
++ * Copyright 2024 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ *
++ * Authors: AMD
++ *
++ */
++
++#include "core_types.h"
++#include "dcn35_clk_mgr.h"
++
++#define DCN_BASE__INST0_SEG1 0x000000C0
++#define mmCLK1_CLK_PLL_REQ 0x16E37
++
++#define mmCLK1_CLK0_DFS_CNTL 0x16E69
++#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
++#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
++#define mmCLK1_CLK3_DFS_CNTL 0x16E72
++#define mmCLK1_CLK4_DFS_CNTL 0x16E75
++#define mmCLK1_CLK5_DFS_CNTL 0x16E78
++
++#define mmCLK1_CLK0_CURRENT_CNT 0x16EFC
++#define mmCLK1_CLK1_CURRENT_CNT 0x16EFD
++#define mmCLK1_CLK2_CURRENT_CNT 0x16EFE
++#define mmCLK1_CLK3_CURRENT_CNT 0x16EFF
++#define mmCLK1_CLK4_CURRENT_CNT 0x16F00
++#define mmCLK1_CLK5_CURRENT_CNT 0x16F01
++
++#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
++#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
++#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
++#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
++#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
++#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
++
++#define mmCLK1_CLK0_DS_CNTL 0x16E83
++#define mmCLK1_CLK1_DS_CNTL 0x16E8C
++#define mmCLK1_CLK2_DS_CNTL 0x16E95
++#define mmCLK1_CLK3_DS_CNTL 0x16E9E
++#define mmCLK1_CLK4_DS_CNTL 0x16EA7
++#define mmCLK1_CLK5_DS_CNTL 0x16EB0
++
++#define mmCLK1_CLK0_ALLOW_DS 0x16E84
++#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
++#define mmCLK1_CLK2_ALLOW_DS 0x16E96
++#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
++#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
++#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
++
++#define mmCLK5_spll_field_8 0x1B04B
++#define mmDENTIST_DISPCLK_CNTL 0x0124
++#define regDENTIST_DISPCLK_CNTL 0x0064
++#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
++
++#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
++#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
++#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
++#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
++#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
++#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
++
++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
++
++// DENTIST_DISPCLK_CNTL
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
++
++#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
++
++#define REG(reg) \
++ (clk_mgr->regs->reg)
++
++#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
++
++#define BASE(seg) BASE_INNER(seg)
++
++#define SR(reg_name)\
++ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
++ reg ## reg_name
++
++#define CLK_SR_DCN35(reg_name)\
++ .reg_name = mm ## reg_name
++
++static const struct clk_mgr_registers clk_mgr_regs_dcn351 = {
++ CLK_REG_LIST_DCN35()
++};
++
++static const struct clk_mgr_shift clk_mgr_shift_dcn351 = {
++ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
++};
++
++static const struct clk_mgr_mask clk_mgr_mask_dcn351 = {
++ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
++};
++
++#define TO_CLK_MGR_DCN35(clk_mgr)\
++ container_of(clk_mgr, struct clk_mgr_dcn35, base)
++
++
++void dcn351_clk_mgr_construct(
++ struct dc_context *ctx,
++ struct clk_mgr_dcn35 *clk_mgr,
++ struct pp_smu_funcs *pp_smu,
++ struct dccg *dccg)
++{
++ /*register offset changed*/
++ clk_mgr->base.regs = &clk_mgr_regs_dcn351;
++ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn351;
++ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn351;
++
++ dcn35_clk_mgr_construct(ctx, clk_mgr, pp_smu, dccg);
++
++}
++
++
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+index b77333817f189..6d6cb8ef59db0 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.c
+@@ -36,15 +36,11 @@
+ #include "dcn20/dcn20_clk_mgr.h"
+
+
+-
+-
+ #include "reg_helper.h"
+ #include "core_types.h"
+ #include "dcn35_smu.h"
+ #include "dm_helpers.h"
+
+-/* TODO: remove this include once we ported over remaining clk mgr functions*/
+-#include "dcn30/dcn30_clk_mgr.h"
+ #include "dcn31/dcn31_clk_mgr.h"
+
+ #include "dc_dmub_srv.h"
+@@ -55,35 +51,102 @@
+ #define DC_LOGGER \
+ clk_mgr->base.base.ctx->logger
+
++#define DCN_BASE__INST0_SEG1 0x000000C0
++#define mmCLK1_CLK_PLL_REQ 0x16E37
++
++#define mmCLK1_CLK0_DFS_CNTL 0x16E69
++#define mmCLK1_CLK1_DFS_CNTL 0x16E6C
++#define mmCLK1_CLK2_DFS_CNTL 0x16E6F
++#define mmCLK1_CLK3_DFS_CNTL 0x16E72
++#define mmCLK1_CLK4_DFS_CNTL 0x16E75
++#define mmCLK1_CLK5_DFS_CNTL 0x16E78
++
++#define mmCLK1_CLK0_CURRENT_CNT 0x16EFB
++#define mmCLK1_CLK1_CURRENT_CNT 0x16EFC
++#define mmCLK1_CLK2_CURRENT_CNT 0x16EFD
++#define mmCLK1_CLK3_CURRENT_CNT 0x16EFE
++#define mmCLK1_CLK4_CURRENT_CNT 0x16EFF
++#define mmCLK1_CLK5_CURRENT_CNT 0x16F00
++
++#define mmCLK1_CLK0_BYPASS_CNTL 0x16E8A
++#define mmCLK1_CLK1_BYPASS_CNTL 0x16E93
++#define mmCLK1_CLK2_BYPASS_CNTL 0x16E9C
++#define mmCLK1_CLK3_BYPASS_CNTL 0x16EA5
++#define mmCLK1_CLK4_BYPASS_CNTL 0x16EAE
++#define mmCLK1_CLK5_BYPASS_CNTL 0x16EB7
++
++#define mmCLK1_CLK0_DS_CNTL 0x16E83
++#define mmCLK1_CLK1_DS_CNTL 0x16E8C
++#define mmCLK1_CLK2_DS_CNTL 0x16E95
++#define mmCLK1_CLK3_DS_CNTL 0x16E9E
++#define mmCLK1_CLK4_DS_CNTL 0x16EA7
++#define mmCLK1_CLK5_DS_CNTL 0x16EB0
++
++#define mmCLK1_CLK0_ALLOW_DS 0x16E84
++#define mmCLK1_CLK1_ALLOW_DS 0x16E8D
++#define mmCLK1_CLK2_ALLOW_DS 0x16E96
++#define mmCLK1_CLK3_ALLOW_DS 0x16E9F
++#define mmCLK1_CLK4_ALLOW_DS 0x16EA8
++#define mmCLK1_CLK5_ALLOW_DS 0x16EB1
++
++#define mmCLK5_spll_field_8 0x1B04B
++#define mmDENTIST_DISPCLK_CNTL 0x0124
++#define regDENTIST_DISPCLK_CNTL 0x0064
++#define regDENTIST_DISPCLK_CNTL_BASE_IDX 1
++
++#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
++#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
++#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
++#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
++#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
++#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
++
++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
++#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
++// DENTIST_DISPCLK_CNTL
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER__SHIFT 0x0
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER__SHIFT 0x8
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE__SHIFT 0x13
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE__SHIFT 0x14
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER__SHIFT 0x18
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_WDIVIDER_MASK 0x0000007FL
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_RDIVIDER_MASK 0x00007F00L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DISPCLK_CHG_DONE_MASK 0x00080000L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_CHG_DONE_MASK 0x00100000L
++#define DENTIST_DISPCLK_CNTL__DENTIST_DPPCLK_WDIVIDER_MASK 0x7F000000L
++
++#define CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
++
++#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
++#undef FN
++#define FN(reg_name, field_name) \
++ clk_mgr->clk_mgr_shift->field_name, clk_mgr->clk_mgr_mask->field_name
+
+-#define regCLK1_CLK_PLL_REQ 0x0237
+-#define regCLK1_CLK_PLL_REQ_BASE_IDX 0
++#define REG(reg) \
++ (clk_mgr->regs->reg)
+
+-#define CLK1_CLK_PLL_REQ__FbMult_int__SHIFT 0x0
+-#define CLK1_CLK_PLL_REQ__PllSpineDiv__SHIFT 0xc
+-#define CLK1_CLK_PLL_REQ__FbMult_frac__SHIFT 0x10
+-#define CLK1_CLK_PLL_REQ__FbMult_int_MASK 0x000001FFL
+-#define CLK1_CLK_PLL_REQ__PllSpineDiv_MASK 0x0000F000L
+-#define CLK1_CLK_PLL_REQ__FbMult_frac_MASK 0xFFFF0000L
++#define BASE_INNER(seg) DCN_BASE__INST0_SEG ## seg
+
+-#define regCLK1_CLK2_BYPASS_CNTL 0x029c
+-#define regCLK1_CLK2_BYPASS_CNTL_BASE_IDX 0
++#define BASE(seg) BASE_INNER(seg)
+
+-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL__SHIFT 0x0
+-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV__SHIFT 0x10
+-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK 0x00000007L
+-#define CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_DIV_MASK 0x000F0000L
++#define SR(reg_name)\
++ .reg_name = BASE(reg ## reg_name ## _BASE_IDX) + \
++ reg ## reg_name
+
+-#define regCLK5_0_CLK5_spll_field_8 0x464b
+-#define regCLK5_0_CLK5_spll_field_8_BASE_IDX 0
++#define CLK_SR_DCN35(reg_name)\
++ .reg_name = mm ## reg_name
+
+-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en__SHIFT 0xd
+-#define CLK5_0_CLK5_spll_field_8__spll_ssc_en_MASK 0x00002000L
++static const struct clk_mgr_registers clk_mgr_regs_dcn35 = {
++ CLK_REG_LIST_DCN35()
++};
+
+-#define SMU_VER_THRESHOLD 0x5D4A00 //93.74.0
++static const struct clk_mgr_shift clk_mgr_shift_dcn35 = {
++ CLK_COMMON_MASK_SH_LIST_DCN32(__SHIFT)
++};
+
+-#define REG(reg_name) \
+- (ctx->clk_reg_offsets[reg ## reg_name ## _BASE_IDX] + reg ## reg_name)
++static const struct clk_mgr_mask clk_mgr_mask_dcn35 = {
++ CLK_COMMON_MASK_SH_LIST_DCN32(_MASK)
++};
+
+ #define TO_CLK_MGR_DCN35(clk_mgr)\
+ container_of(clk_mgr, struct clk_mgr_dcn35, base)
+@@ -452,7 +515,6 @@ static int get_vco_frequency_from_reg(struct clk_mgr_internal *clk_mgr)
+ struct fixed31_32 pll_req;
+ unsigned int fbmult_frac_val = 0;
+ unsigned int fbmult_int_val = 0;
+- struct dc_context *ctx = clk_mgr->base.ctx;
+
+ /*
+ * Register value of fbmult is in 8.16 format, we are converting to 314.32
+@@ -512,12 +574,12 @@ static void dcn35_dump_clk_registers(struct clk_state_registers_and_bypass *regs
+ static bool dcn35_is_spll_ssc_enabled(struct clk_mgr *clk_mgr_base)
+ {
+ struct clk_mgr_internal *clk_mgr = TO_CLK_MGR_INTERNAL(clk_mgr_base);
+- struct dc_context *ctx = clk_mgr->base.ctx;
++
+ uint32_t ssc_enable;
+
+- REG_GET(CLK5_0_CLK5_spll_field_8, spll_ssc_en, &ssc_enable);
++ ssc_enable = REG_READ(CLK5_spll_field_8) & CLK5_spll_field_8__spll_ssc_en_MASK;
+
+- return ssc_enable == 1;
++ return ssc_enable != 0;
+ }
+
+ static void init_clk_states(struct clk_mgr *clk_mgr)
+@@ -642,10 +704,10 @@ static struct dcn35_ss_info_table ss_info_table = {
+
+ static void dcn35_read_ss_info_from_lut(struct clk_mgr_internal *clk_mgr)
+ {
+- struct dc_context *ctx = clk_mgr->base.ctx;
+- uint32_t clock_source;
++ uint32_t clock_source = 0;
++
++ clock_source = REG_READ(CLK1_CLK2_BYPASS_CNTL) & CLK1_CLK2_BYPASS_CNTL__CLK2_BYPASS_SEL_MASK;
+
+- REG_GET(CLK1_CLK2_BYPASS_CNTL, CLK2_BYPASS_SEL, &clock_source);
+ // If it's DFS mode, clock_source is 0.
+ if (dcn35_is_spll_ssc_enabled(&clk_mgr->base) && (clock_source < ARRAY_SIZE(ss_info_table.ss_percentage))) {
+ clk_mgr->dprefclk_ss_percentage = ss_info_table.ss_percentage[clock_source];
+@@ -1112,6 +1174,12 @@ void dcn35_clk_mgr_construct(
+ clk_mgr->base.dprefclk_ss_divider = 1000;
+ clk_mgr->base.ss_on_dprefclk = false;
+ clk_mgr->base.dfs_ref_freq_khz = 48000;
++ if (ctx->dce_version == DCN_VERSION_3_5) {
++ clk_mgr->base.regs = &clk_mgr_regs_dcn35;
++ clk_mgr->base.clk_mgr_shift = &clk_mgr_shift_dcn35;
++ clk_mgr->base.clk_mgr_mask = &clk_mgr_mask_dcn35;
++ }
++
+
+ clk_mgr->smu_wm_set.wm_set = (struct dcn35_watermarks *)dm_helpers_allocate_gpu_mem(
+ clk_mgr->base.base.ctx,
+diff --git a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
+index 1203dc605b12c..a12a9bf90806e 100644
+--- a/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
++++ b/drivers/gpu/drm/amd/display/dc/clk_mgr/dcn35/dcn35_clk_mgr.h
+@@ -60,4 +60,8 @@ void dcn35_clk_mgr_construct(struct dc_context *ctx,
+
+ void dcn35_clk_mgr_destroy(struct clk_mgr_internal *clk_mgr_int);
+
++void dcn351_clk_mgr_construct(struct dc_context *ctx,
++ struct clk_mgr_dcn35 *clk_mgr,
++ struct pp_smu_funcs *pp_smu,
++ struct dccg *dccg);
+ #endif //__DCN35_CLK_MGR_H__
+diff --git a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+index c2dd061892f4d..7a1ca1e98059b 100644
+--- a/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
++++ b/drivers/gpu/drm/amd/display/dc/inc/hw/clk_mgr_internal.h
+@@ -166,6 +166,41 @@ enum dentist_divider_range {
+ CLK_SR_DCN32(CLK1_CLK4_CURRENT_CNT), \
+ CLK_SR_DCN32(CLK4_CLK0_CURRENT_CNT)
+
++#define CLK_REG_LIST_DCN35() \
++ CLK_SR_DCN35(CLK1_CLK_PLL_REQ), \
++ CLK_SR_DCN35(CLK1_CLK0_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK1_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK2_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK3_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK4_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK5_DFS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK0_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK1_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK2_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK3_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK4_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK5_CURRENT_CNT), \
++ CLK_SR_DCN35(CLK1_CLK0_BYPASS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK1_BYPASS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK2_BYPASS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK3_BYPASS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK4_BYPASS_CNTL),\
++ CLK_SR_DCN35(CLK1_CLK5_BYPASS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK0_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK1_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK2_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK3_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK4_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK5_DS_CNTL), \
++ CLK_SR_DCN35(CLK1_CLK0_ALLOW_DS), \
++ CLK_SR_DCN35(CLK1_CLK1_ALLOW_DS), \
++ CLK_SR_DCN35(CLK1_CLK2_ALLOW_DS), \
++ CLK_SR_DCN35(CLK1_CLK3_ALLOW_DS), \
++ CLK_SR_DCN35(CLK1_CLK4_ALLOW_DS), \
++ CLK_SR_DCN35(CLK1_CLK5_ALLOW_DS), \
++ CLK_SR_DCN35(CLK5_spll_field_8), \
++ SR(DENTIST_DISPCLK_CNTL), \
++
+ #define CLK_COMMON_MASK_SH_LIST_DCN32(mask_sh) \
+ CLK_COMMON_MASK_SH_LIST_DCN20_BASE(mask_sh),\
+ CLK_SF(CLK1_CLK_PLL_REQ, FbMult_int, mask_sh),\
+@@ -236,6 +271,7 @@ struct clk_mgr_registers {
+ uint32_t CLK1_CLK2_DFS_CNTL;
+ uint32_t CLK1_CLK3_DFS_CNTL;
+ uint32_t CLK1_CLK4_DFS_CNTL;
++ uint32_t CLK1_CLK5_DFS_CNTL;
+ uint32_t CLK2_CLK2_DFS_CNTL;
+
+ uint32_t CLK1_CLK0_CURRENT_CNT;
+@@ -243,11 +279,34 @@ struct clk_mgr_registers {
+ uint32_t CLK1_CLK2_CURRENT_CNT;
+ uint32_t CLK1_CLK3_CURRENT_CNT;
+ uint32_t CLK1_CLK4_CURRENT_CNT;
++ uint32_t CLK1_CLK5_CURRENT_CNT;
+
+ uint32_t CLK0_CLK0_DFS_CNTL;
+ uint32_t CLK0_CLK1_DFS_CNTL;
+ uint32_t CLK0_CLK3_DFS_CNTL;
+ uint32_t CLK0_CLK4_DFS_CNTL;
++ uint32_t CLK1_CLK0_BYPASS_CNTL;
++ uint32_t CLK1_CLK1_BYPASS_CNTL;
++ uint32_t CLK1_CLK2_BYPASS_CNTL;
++ uint32_t CLK1_CLK3_BYPASS_CNTL;
++ uint32_t CLK1_CLK4_BYPASS_CNTL;
++ uint32_t CLK1_CLK5_BYPASS_CNTL;
++
++ uint32_t CLK1_CLK0_DS_CNTL;
++ uint32_t CLK1_CLK1_DS_CNTL;
++ uint32_t CLK1_CLK2_DS_CNTL;
++ uint32_t CLK1_CLK3_DS_CNTL;
++ uint32_t CLK1_CLK4_DS_CNTL;
++ uint32_t CLK1_CLK5_DS_CNTL;
++
++ uint32_t CLK1_CLK0_ALLOW_DS;
++ uint32_t CLK1_CLK1_ALLOW_DS;
++ uint32_t CLK1_CLK2_ALLOW_DS;
++ uint32_t CLK1_CLK3_ALLOW_DS;
++ uint32_t CLK1_CLK4_ALLOW_DS;
++ uint32_t CLK1_CLK5_ALLOW_DS;
++ uint32_t CLK5_spll_field_8;
++
+ };
+
+ struct clk_mgr_shift {
+--
+2.39.5
+
--- /dev/null
+From 51b92ccce675db0c6d7d11c876ace5999c7a8c6d Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 28 Jan 2025 19:16:49 +0000
+Subject: drm/amdkfd: Ensure consistent barrier state saved in gfx12 trap
+ handler
+
+From: Lancelot SIX <lancelot.six@amd.com>
+
+[ Upstream commit d584198a6fe4c51f4aa88ad72f258f8961a0f11c ]
+
+It is possible for some waves in a workgroup to finish their save
+sequence before the group leader has had time to capture the workgroup
+barrier state. When this happens, having those waves exit do impact the
+barrier state. As a consequence, the state captured by the group leader
+is invalid, and is eventually incorrectly restored.
+
+This patch proposes to have all waves in a workgroup wait for each other
+at the end of their save sequence (just before calling s_endpgm_saved).
+
+Signed-off-by: Lancelot SIX <lancelot.six@amd.com>
+Reviewed-by: Jay Cornwall <jay.cornwall@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Cc: stable@vger.kernel.org # 6.12.x
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 3 ++-
+ drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm | 4 ++++
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+index 02f7ba8c93cd4..7062f12b5b751 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+@@ -4117,7 +4117,8 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
+ 0x0000ffff, 0x8bfe7e7e,
+ 0x8bea6a6a, 0xb97af804,
+ 0xbe804ec2, 0xbf94fffe,
+- 0xbe804a6c, 0xbfb10000,
++ 0xbe804a6c, 0xbe804ec2,
++ 0xbf94fffe, 0xbfb10000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0xbf9f0000,
+ 0xbf9f0000, 0x00000000,
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+index 1740e98c6719d..7b9d36e5fa437 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+@@ -1049,6 +1049,10 @@ L_SKIP_BARRIER_RESTORE:
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+ L_END_PGM:
++ // Make sure that no wave of the workgroup can exit the trap handler
++ // before the workgroup barrier state is saved.
++ s_barrier_signal -2
++ s_barrier_wait -2
+ s_endpgm_saved
+ end
+
+--
+2.39.5
+
--- /dev/null
+From 028ee9808566a4aed6209071340acf7dfa455cef Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 2 Oct 2024 16:12:35 -0500
+Subject: drm/amdkfd: Move gfx12 trap handler to separate file
+
+From: Jay Cornwall <jay.cornwall@amd.com>
+
+[ Upstream commit 62498e797aeb2bfa92a823ee1a8253f96d1cbe3f ]
+
+gfx12 derivatives will have substantially different trap handler
+implementations from gfx10/gfx11. Add a separate source file for
+gfx12+ and remove unneeded conditional code.
+
+No functional change.
+
+v2: Revert copyright date to 2018, minor comment fixes
+
+Signed-off-by: Jay Cornwall <jay.cornwall@amd.com>
+Reviewed-by: Lancelot Six <lancelot.six@amd.com>
+Cc: Jonathan Kim <jonathan.kim@amd.com>
+Acked-by: Alex Deucher <alexander.deucher@amd.com>
+Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
+Stable-dep-of: d584198a6fe4 ("drm/amdkfd: Ensure consistent barrier state saved in gfx12 trap handler")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ .../amd/amdkfd/cwsr_trap_handler_gfx10.asm | 202 +--
+ .../amd/amdkfd/cwsr_trap_handler_gfx12.asm | 1126 +++++++++++++++++
+ 2 files changed, 1127 insertions(+), 201 deletions(-)
+ create mode 100644 drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+index 44772eec9ef4d..96fbb16ceb216 100644
+--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx10.asm
+@@ -34,41 +34,24 @@
+ * cpp -DASIC_FAMILY=CHIP_PLUM_BONITO cwsr_trap_handler_gfx10.asm -P -o gfx11.sp3
+ * sp3 gfx11.sp3 -hex gfx11.hex
+ *
+- * gfx12:
+- * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx10.asm -P -o gfx12.sp3
+- * sp3 gfx12.sp3 -hex gfx12.hex
+ */
+
+ #define CHIP_NAVI10 26
+ #define CHIP_SIENNA_CICHLID 30
+ #define CHIP_PLUM_BONITO 36
+-#define CHIP_GFX12 37
+
+ #define NO_SQC_STORE (ASIC_FAMILY >= CHIP_SIENNA_CICHLID)
+ #define HAVE_XNACK (ASIC_FAMILY < CHIP_SIENNA_CICHLID)
+ #define HAVE_SENDMSG_RTN (ASIC_FAMILY >= CHIP_PLUM_BONITO)
+ #define HAVE_BUFFER_LDS_LOAD (ASIC_FAMILY < CHIP_PLUM_BONITO)
+-#define SW_SA_TRAP (ASIC_FAMILY >= CHIP_PLUM_BONITO && ASIC_FAMILY < CHIP_GFX12)
++#define SW_SA_TRAP (ASIC_FAMILY == CHIP_PLUM_BONITO)
+ #define SAVE_AFTER_XNACK_ERROR (HAVE_XNACK && !NO_SQC_STORE) // workaround for TCP store failure after XNACK error when ALLOW_REPLAY=0, for debugger
+ #define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost MODE.DEBUG_EN exception when SAVECTX raised
+
+-#if ASIC_FAMILY < CHIP_GFX12
+ #define S_COHERENCE glc:1
+ #define V_COHERENCE slc:1 glc:1
+ #define S_WAITCNT_0 s_waitcnt 0
+-#else
+-#define S_COHERENCE scope:SCOPE_SYS
+-#define V_COHERENCE scope:SCOPE_SYS
+-#define S_WAITCNT_0 s_wait_idle
+-
+-#define HW_REG_SHADER_FLAT_SCRATCH_LO HW_REG_WAVE_SCRATCH_BASE_LO
+-#define HW_REG_SHADER_FLAT_SCRATCH_HI HW_REG_WAVE_SCRATCH_BASE_HI
+-#define HW_REG_GPR_ALLOC HW_REG_WAVE_GPR_ALLOC
+-#define HW_REG_LDS_ALLOC HW_REG_WAVE_LDS_ALLOC
+-#define HW_REG_MODE HW_REG_WAVE_MODE
+-#endif
+
+-#if ASIC_FAMILY < CHIP_GFX12
+ var SQ_WAVE_STATUS_SPI_PRIO_MASK = 0x00000006
+ var SQ_WAVE_STATUS_HALT_MASK = 0x2000
+ var SQ_WAVE_STATUS_ECC_ERR_MASK = 0x20000
+@@ -81,21 +64,6 @@ var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATUS_SPI_PRIO_MASK|SQ_WAVE_STATUS_E
+ var S_STATUS_HALT_MASK = SQ_WAVE_STATUS_HALT_MASK
+ var S_SAVE_PC_HI_TRAP_ID_MASK = 0x00FF0000
+ var S_SAVE_PC_HI_HT_MASK = 0x01000000
+-#else
+-var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
+-var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
+-var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
+-var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
+-var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
+-var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
+-var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
+-var SQ_WAVE_STATUS_WAVE64_SIZE = 1
+-var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
+-var S_STATUS_HWREG = HW_REG_WAVE_STATE_PRIV
+-var S_STATUS_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
+-var S_STATUS_HALT_MASK = SQ_WAVE_STATE_PRIV_HALT_MASK
+-var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
+-#endif
+
+ var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
+ var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
+@@ -110,7 +78,6 @@ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 8
+ var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
+ #endif
+
+-#if ASIC_FAMILY < CHIP_GFX12
+ var SQ_WAVE_TRAPSTS_SAVECTX_MASK = 0x400
+ var SQ_WAVE_TRAPSTS_EXCP_MASK = 0x1FF
+ var SQ_WAVE_TRAPSTS_SAVECTX_SHIFT = 10
+@@ -161,39 +128,6 @@ var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
+ var S_TRAPSTS_HWREG = HW_REG_TRAPSTS
+ var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_TRAPSTS_SAVECTX_MASK
+ var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_TRAPSTS_SAVECTX_SHIFT
+-#else
+-var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
+-var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
+-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
+-var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
+-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
+-var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
+-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
+-var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
+-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
+-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
+-var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
+-var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
+-var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
+-var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
+-
+-var S_TRAPSTS_HWREG = HW_REG_WAVE_EXCP_FLAG_PRIV
+-var S_TRAPSTS_SAVE_CONTEXT_MASK = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
+-var S_TRAPSTS_SAVE_CONTEXT_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+-var S_TRAPSTS_NON_MASKABLE_EXCP_MASK = SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
+- SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
+- SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
+- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
+- SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
+- SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
+-var S_TRAPSTS_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
+-var S_TRAPSTS_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+-var S_TRAPSTS_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
+-var S_TRAPSTS_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
+-var S_TRAPSTS_RESTORE_PART_3_SIZE = 32 - S_TRAPSTS_RESTORE_PART_3_SHIFT
+-var BARRIER_STATE_SIGNAL_OFFSET = 16
+-var BARRIER_STATE_VALID_OFFSET = 0
+-#endif
+
+ // bits [31:24] unused by SPI debug data
+ var TTMP11_SAVE_REPLAY_W64H_SHIFT = 31
+@@ -305,11 +239,7 @@ L_TRAP_NO_BARRIER:
+
+ L_HALTED:
+ // Host trap may occur while wave is halted.
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
+-#else
+- s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+-#endif
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+
+ L_CHECK_SAVE:
+@@ -336,7 +266,6 @@ L_NOT_HALTED:
+ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
+ // Maskable exceptions only cause the wave to enter the trap handler if
+ // their respective bit in mode.excp_en is set.
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_and_b32 ttmp2, s_save_trapsts, SQ_WAVE_TRAPSTS_EXCP_MASK|SQ_WAVE_TRAPSTS_EXCP_HI_MASK
+ s_cbranch_scc0 L_CHECK_TRAP_ID
+
+@@ -349,17 +278,6 @@ L_NOT_ADDR_WATCH:
+ s_lshl_b32 ttmp2, ttmp2, SQ_WAVE_MODE_EXCP_EN_SHIFT
+ s_and_b32 ttmp2, ttmp2, ttmp3
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+-#else
+- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+- s_and_b32 ttmp3, s_save_trapsts, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
+- s_cbranch_scc0 L_NOT_ADDR_WATCH
+- s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
+-
+-L_NOT_ADDR_WATCH:
+- s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
+- s_and_b32 ttmp2, ttmp3, ttmp2
+- s_cbranch_scc1 L_FETCH_2ND_TRAP
+-#endif
+
+ L_CHECK_TRAP_ID:
+ // Check trap_id != 0
+@@ -369,13 +287,8 @@ L_CHECK_TRAP_ID:
+ #if SINGLE_STEP_MISSED_WORKAROUND
+ // Prioritize single step exception over context save.
+ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_getreg_b32 ttmp2, hwreg(HW_REG_MODE)
+ s_and_b32 ttmp2, ttmp2, SQ_WAVE_MODE_DEBUG_EN_MASK
+-#else
+- // WAVE_TRAP_CTRL is already in ttmp3.
+- s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
+-#endif
+ s_cbranch_scc1 L_FETCH_2ND_TRAP
+ #endif
+
+@@ -425,12 +338,7 @@ L_NO_NEXT_TRAP:
+ s_cbranch_scc1 L_TRAP_CASE
+
+ // Host trap will not cause trap re-entry.
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_HT_MASK
+-#else
+- s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
+- s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
+-#endif
+ s_cbranch_scc1 L_EXIT_TRAP
+ s_or_b32 s_save_status, s_save_status, S_STATUS_HALT_MASK
+
+@@ -457,16 +365,7 @@ L_EXIT_TRAP:
+ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
+ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
+
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_save_status
+-#else
+- // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
+- // Only restore fields which the trap handler changes.
+- s_lshr_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_SCC_SHIFT
+- s_setreg_b32 hwreg(S_STATUS_HWREG, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
+- SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_status
+-#endif
+-
+ s_rfe_b64 [ttmp0, ttmp1]
+
+ L_SAVE:
+@@ -478,14 +377,6 @@ L_SAVE:
+ s_endpgm
+ L_HAVE_VGPRS:
+ #endif
+-#if ASIC_FAMILY >= CHIP_GFX12
+- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+- s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
+- s_cbranch_scc0 L_HAVE_VGPRS
+- s_endpgm
+-L_HAVE_VGPRS:
+-#endif
+-
+ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
+ s_mov_b32 s_save_tmp, 0
+ s_setreg_b32 hwreg(S_TRAPSTS_HWREG, S_TRAPSTS_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
+@@ -671,19 +562,6 @@ L_SAVE_HWREG:
+ s_mov_b32 m0, 0x0 //Next lane of v2 to write to
+ #endif
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- // Ensure no further changes to barrier or LDS state.
+- // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
+- s_barrier_signal -2
+- s_barrier_wait -2
+-
+- // Re-read final state of BARRIER_COMPLETE field for save.
+- s_getreg_b32 s_save_tmp, hwreg(S_STATUS_HWREG)
+- s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+- s_andn2_b32 s_save_status, s_save_status, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
+- s_or_b32 s_save_status, s_save_status, s_save_tmp
+-#endif
+-
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+ write_hwreg_to_mem(s_save_pc_lo, s_save_buf_rsrc0, s_save_mem_offset)
+ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+@@ -707,21 +585,6 @@ L_SAVE_HWREG:
+ s_getreg_b32 s_save_m0, hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI)
+ write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
+- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+-
+- s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
+- write_hwreg_to_mem(s_save_m0, s_save_buf_rsrc0, s_save_mem_offset)
+-
+- s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
+- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+-
+- s_get_barrier_state s_save_tmp, -1
+- s_wait_kmcnt (0)
+- write_hwreg_to_mem(s_save_tmp, s_save_buf_rsrc0, s_save_mem_offset)
+-#endif
+-
+ #if NO_SQC_STORE
+ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
+ s_mov_b32 exec_lo, 0xFFFF
+@@ -814,9 +677,7 @@ L_SAVE_LDS_NORMAL:
+ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
+ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
+
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_barrier //LDS is used? wait for other waves in the same TG
+-#endif
+ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
+ s_cbranch_scc0 L_SAVE_LDS_DONE
+
+@@ -1081,11 +942,6 @@ L_RESTORE:
+ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
+ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- // Save s_restore_spi_init_hi for later use.
+- s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
+-#endif
+-
+ //determine it is wave32 or wave64
+ get_wave_size2(s_restore_size)
+
+@@ -1320,9 +1176,7 @@ L_RESTORE_SGPR:
+ // s_barrier with MODE.DEBUG_EN=1, STATUS.PRIV=1 incorrectly asserts debug exception.
+ // Clear DEBUG_EN before and restore MODE after the barrier.
+ s_setreg_imm32_b32 hwreg(HW_REG_MODE), 0
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_barrier //barrier to ensure the readiness of LDS before access attemps from any other wave in the same TG
+-#endif
+
+ /* restore HW registers */
+ L_RESTORE_HWREG:
+@@ -1334,11 +1188,6 @@ L_RESTORE_HWREG:
+
+ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- // Restore s_restore_spi_init_hi before the saved value gets clobbered.
+- s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
+-#endif
+-
+ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
+ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
+@@ -1358,44 +1207,6 @@ L_RESTORE_HWREG:
+
+ s_setreg_b32 hwreg(HW_REG_SHADER_FLAT_SCRATCH_HI), s_restore_flat_scratch
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+- S_WAITCNT_0
+- s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
+-
+- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+- S_WAITCNT_0
+- s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
+-
+- // Only the first wave needs to restore the workgroup barrier.
+- s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
+- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+-
+- // Skip over WAVE_STATUS, since there is no state to restore from it
+- s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
+-
+- read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
+- S_WAITCNT_0
+-
+- s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
+- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+-
+- // extract the saved signal count from s_restore_tmp
+- s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
+-
+- // We need to call s_barrier_signal repeatedly to restore the signal
+- // count of the work group barrier. The member count is already
+- // initialized with the number of waves in the work group.
+-L_BARRIER_RESTORE_LOOP:
+- s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
+- s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
+- s_barrier_signal -1
+- s_add_i32 s_restore_tmp, s_restore_tmp, -1
+- s_branch L_BARRIER_RESTORE_LOOP
+-
+-L_SKIP_BARRIER_RESTORE:
+-#endif
+-
+ s_mov_b32 m0, s_restore_m0
+ s_mov_b32 exec_lo, s_restore_exec_lo
+ s_mov_b32 exec_hi, s_restore_exec_hi
+@@ -1453,13 +1264,6 @@ L_RETURN_WITHOUT_PRIV:
+
+ s_setreg_b32 hwreg(S_STATUS_HWREG), s_restore_status // SCC is included, which is changed by previous salu
+
+-#if ASIC_FAMILY >= CHIP_GFX12
+- // Make barrier and LDS state visible to all waves in the group.
+- // STATE_PRIV.BARRIER_COMPLETE may change after this point.
+- s_barrier_signal -2
+- s_barrier_wait -2
+-#endif
+-
+ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
+
+ L_END_PGM:
+@@ -1598,11 +1402,7 @@ function get_hwreg_size_bytes
+ end
+
+ function get_wave_size2(s_reg)
+-#if ASIC_FAMILY < CHIP_GFX12
+ s_getreg_b32 s_reg, hwreg(HW_REG_IB_STS2,SQ_WAVE_IB_STS2_WAVE64_SHIFT,SQ_WAVE_IB_STS2_WAVE64_SIZE)
+-#else
+- s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
+-#endif
+ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
+ end
+
+diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+new file mode 100644
+index 0000000000000..1740e98c6719d
+--- /dev/null
++++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+@@ -0,0 +1,1126 @@
++/*
++ * Copyright 2018 Advanced Micro Devices, Inc.
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the "Software"),
++ * to deal in the Software without restriction, including without limitation
++ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
++ * and/or sell copies of the Software, and to permit persons to whom the
++ * Software is furnished to do so, subject to the following conditions:
++ *
++ * The above copyright notice and this permission notice shall be included in
++ * all copies or substantial portions of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
++ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
++ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
++ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
++ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
++ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
++ * OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++/* To compile this assembly code:
++ *
++ * gfx12:
++ * cpp -DASIC_FAMILY=CHIP_GFX12 cwsr_trap_handler_gfx12.asm -P -o gfx12.sp3
++ * sp3 gfx12.sp3 -hex gfx12.hex
++ */
++
++#define CHIP_GFX12 37
++
++#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
++
++var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
++var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
++var SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK = 0xC00
++var SQ_WAVE_STATE_PRIV_HALT_MASK = 0x4000
++var SQ_WAVE_STATE_PRIV_POISON_ERR_MASK = 0x8000
++var SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT = 15
++var SQ_WAVE_STATUS_WAVE64_SHIFT = 29
++var SQ_WAVE_STATUS_WAVE64_SIZE = 1
++var SQ_WAVE_STATUS_NO_VGPRS_SHIFT = 24
++var SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK = SQ_WAVE_STATE_PRIV_SYS_PRIO_MASK|SQ_WAVE_STATE_PRIV_POISON_ERR_MASK
++var S_SAVE_PC_HI_TRAP_ID_MASK = 0xF0000000
++
++var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT = 12
++var SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE = 9
++var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE = 8
++var SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT = 12
++var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT = 24
++var SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE = 4
++var SQ_WAVE_LDS_ALLOC_GRANULARITY = 9
++
++var SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK = 0xF
++var SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK = 0x10
++var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT = 5
++var SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK = 0x20
++var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK = 0x40
++var SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT = 6
++var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK = 0x80
++var SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT = 7
++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK = 0x100
++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT = 8
++var SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK = 0x200
++var SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK = 0x800
++var SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK = 0x80
++var SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK = 0x200
++
++var SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK= SQ_WAVE_EXCP_FLAG_PRIV_MEM_VIOL_MASK |\
++ SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_MASK |\
++ SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK |\
++ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_MASK |\
++ SQ_WAVE_EXCP_FLAG_PRIV_WAVE_END_MASK |\
++ SQ_WAVE_EXCP_FLAG_PRIV_TRAP_AFTER_INST_MASK
++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT
++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE = SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_ILLEGAL_INST_SHIFT
++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT = SQ_WAVE_EXCP_FLAG_PRIV_WAVE_START_SHIFT
++var SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE = 32 - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT
++var BARRIER_STATE_SIGNAL_OFFSET = 16
++var BARRIER_STATE_VALID_OFFSET = 0
++
++var TTMP11_DEBUG_TRAP_ENABLED_SHIFT = 23
++var TTMP11_DEBUG_TRAP_ENABLED_MASK = 0x800000
++
++// SQ_SEL_X/Y/Z/W, BUF_NUM_FORMAT_FLOAT, (0 for MUBUF stride[17:14]
++// when ADD_TID_ENABLE and BUF_DATA_FORMAT_32 for MTBUF), ADD_TID_ENABLE
++var S_SAVE_BUF_RSRC_WORD1_STRIDE = 0x00040000
++var S_SAVE_BUF_RSRC_WORD3_MISC = 0x10807FAC
++var S_SAVE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
++var S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT = 26
++
++var S_SAVE_PC_HI_FIRST_WAVE_MASK = 0x80000000
++var S_SAVE_PC_HI_FIRST_WAVE_SHIFT = 31
++
++var s_sgpr_save_num = 108
++
++var s_save_spi_init_lo = exec_lo
++var s_save_spi_init_hi = exec_hi
++var s_save_pc_lo = ttmp0
++var s_save_pc_hi = ttmp1
++var s_save_exec_lo = ttmp2
++var s_save_exec_hi = ttmp3
++var s_save_state_priv = ttmp12
++var s_save_excp_flag_priv = ttmp15
++var s_save_xnack_mask = s_save_excp_flag_priv
++var s_wave_size = ttmp7
++var s_save_buf_rsrc0 = ttmp8
++var s_save_buf_rsrc1 = ttmp9
++var s_save_buf_rsrc2 = ttmp10
++var s_save_buf_rsrc3 = ttmp11
++var s_save_mem_offset = ttmp4
++var s_save_alloc_size = s_save_excp_flag_priv
++var s_save_tmp = ttmp14
++var s_save_m0 = ttmp5
++var s_save_ttmps_lo = s_save_tmp
++var s_save_ttmps_hi = s_save_excp_flag_priv
++
++var S_RESTORE_BUF_RSRC_WORD1_STRIDE = S_SAVE_BUF_RSRC_WORD1_STRIDE
++var S_RESTORE_BUF_RSRC_WORD3_MISC = S_SAVE_BUF_RSRC_WORD3_MISC
++
++var S_RESTORE_SPI_INIT_FIRST_WAVE_MASK = 0x04000000
++var S_RESTORE_SPI_INIT_FIRST_WAVE_SHIFT = 26
++var S_WAVE_SIZE = 25
++
++var s_restore_spi_init_lo = exec_lo
++var s_restore_spi_init_hi = exec_hi
++var s_restore_mem_offset = ttmp12
++var s_restore_alloc_size = ttmp3
++var s_restore_tmp = ttmp2
++var s_restore_mem_offset_save = s_restore_tmp
++var s_restore_m0 = s_restore_alloc_size
++var s_restore_mode = ttmp7
++var s_restore_flat_scratch = s_restore_tmp
++var s_restore_pc_lo = ttmp0
++var s_restore_pc_hi = ttmp1
++var s_restore_exec_lo = ttmp4
++var s_restore_exec_hi = ttmp5
++var s_restore_state_priv = ttmp14
++var s_restore_excp_flag_priv = ttmp15
++var s_restore_xnack_mask = ttmp13
++var s_restore_buf_rsrc0 = ttmp8
++var s_restore_buf_rsrc1 = ttmp9
++var s_restore_buf_rsrc2 = ttmp10
++var s_restore_buf_rsrc3 = ttmp11
++var s_restore_size = ttmp6
++var s_restore_ttmps_lo = s_restore_tmp
++var s_restore_ttmps_hi = s_restore_alloc_size
++var s_restore_spi_init_hi_save = s_restore_exec_hi
++
++shader main
++ asic(DEFAULT)
++ type(CS)
++ wave_size(32)
++
++ s_branch L_SKIP_RESTORE //NOT restore. might be a regular trap or save
++
++L_JUMP_TO_RESTORE:
++ s_branch L_RESTORE
++
++L_SKIP_RESTORE:
++ s_getreg_b32 s_save_state_priv, hwreg(HW_REG_WAVE_STATE_PRIV) //save STATUS since we will change SCC
++
++ // Clear SPI_PRIO: do not save with elevated priority.
++ // Clear ECC_ERR: prevents SQC store and triggers FATAL_HALT if setreg'd.
++ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_ALWAYS_CLEAR_MASK
++
++ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
++
++ s_and_b32 ttmp2, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
++ s_cbranch_scc0 L_NOT_HALTED
++
++L_HALTED:
++ // Host trap may occur while wave is halted.
++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
++ s_cbranch_scc1 L_FETCH_2ND_TRAP
++
++L_CHECK_SAVE:
++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
++ s_cbranch_scc1 L_SAVE
++
++ // Wave is halted but neither host trap nor SAVECTX is raised.
++ // Caused by instruction fetch memory violation.
++ // Spin wait until context saved to prevent interrupt storm.
++ s_sleep 0x10
++ s_getreg_b32 s_save_excp_flag_priv, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
++ s_branch L_CHECK_SAVE
++
++L_NOT_HALTED:
++ // Let second-level handle non-SAVECTX exception or trap.
++ // Any concurrent SAVECTX will be handled upon re-entry once halted.
++
++ // Check non-maskable exceptions. memory_violation, illegal_instruction
++ // and xnack_error exceptions always cause the wave to enter the trap
++ // handler.
++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_NON_MASKABLE_EXCP_MASK
++ s_cbranch_scc1 L_FETCH_2ND_TRAP
++
++ // Check for maskable exceptions in trapsts.excp and trapsts.excp_hi.
++ // Maskable exceptions only cause the wave to enter the trap handler if
++ // their respective bit in mode.excp_en is set.
++ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
++ s_and_b32 ttmp3, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_ADDR_WATCH_MASK
++ s_cbranch_scc0 L_NOT_ADDR_WATCH
++ s_or_b32 ttmp2, ttmp2, SQ_WAVE_TRAP_CTRL_ADDR_WATCH_MASK
++
++L_NOT_ADDR_WATCH:
++ s_getreg_b32 ttmp3, hwreg(HW_REG_WAVE_TRAP_CTRL)
++ s_and_b32 ttmp2, ttmp3, ttmp2
++ s_cbranch_scc1 L_FETCH_2ND_TRAP
++
++L_CHECK_TRAP_ID:
++ // Check trap_id != 0
++ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
++ s_cbranch_scc1 L_FETCH_2ND_TRAP
++
++#if SINGLE_STEP_MISSED_WORKAROUND
++ // Prioritize single step exception over context save.
++ // Second-level trap will halt wave and RFE, re-entering for SAVECTX.
++ // WAVE_TRAP_CTRL is already in ttmp3.
++ s_and_b32 ttmp3, ttmp3, SQ_WAVE_TRAP_CTRL_TRAP_AFTER_INST_MASK
++ s_cbranch_scc1 L_FETCH_2ND_TRAP
++#endif
++
++ s_and_b32 ttmp2, s_save_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_MASK
++ s_cbranch_scc1 L_SAVE
++
++L_FETCH_2ND_TRAP:
++ // Read second-level TBA/TMA from first-level TMA and jump if available.
++ // ttmp[2:5] and ttmp12 can be used (others hold SPI-initialized debug data)
++ // ttmp12 holds SQ_WAVE_STATUS
++ s_sendmsg_rtn_b64 [ttmp14, ttmp15], sendmsg(MSG_RTN_GET_TMA)
++ s_wait_idle
++ s_lshl_b64 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8
++
++ s_bitcmp1_b32 ttmp15, 0xF
++ s_cbranch_scc0 L_NO_SIGN_EXTEND_TMA
++ s_or_b32 ttmp15, ttmp15, 0xFFFF0000
++L_NO_SIGN_EXTEND_TMA:
++
++ s_load_dword ttmp2, [ttmp14, ttmp15], 0x10 scope:SCOPE_SYS // debug trap enabled flag
++ s_wait_idle
++ s_lshl_b32 ttmp2, ttmp2, TTMP11_DEBUG_TRAP_ENABLED_SHIFT
++ s_andn2_b32 ttmp11, ttmp11, TTMP11_DEBUG_TRAP_ENABLED_MASK
++ s_or_b32 ttmp11, ttmp11, ttmp2
++
++ s_load_dwordx2 [ttmp2, ttmp3], [ttmp14, ttmp15], 0x0 scope:SCOPE_SYS // second-level TBA
++ s_wait_idle
++ s_load_dwordx2 [ttmp14, ttmp15], [ttmp14, ttmp15], 0x8 scope:SCOPE_SYS // second-level TMA
++ s_wait_idle
++
++ s_and_b64 [ttmp2, ttmp3], [ttmp2, ttmp3], [ttmp2, ttmp3]
++ s_cbranch_scc0 L_NO_NEXT_TRAP // second-level trap handler not been set
++ s_setpc_b64 [ttmp2, ttmp3] // jump to second-level trap handler
++
++L_NO_NEXT_TRAP:
++ // If not caused by trap then halt wave to prevent re-entry.
++ s_and_b32 ttmp2, s_save_pc_hi, S_SAVE_PC_HI_TRAP_ID_MASK
++ s_cbranch_scc1 L_TRAP_CASE
++
++ // Host trap will not cause trap re-entry.
++ s_getreg_b32 ttmp2, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
++ s_and_b32 ttmp2, ttmp2, SQ_WAVE_EXCP_FLAG_PRIV_HOST_TRAP_MASK
++ s_cbranch_scc1 L_EXIT_TRAP
++ s_or_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_HALT_MASK
++
++ // If the PC points to S_ENDPGM then context save will fail if STATE_PRIV.HALT is set.
++ // Rewind the PC to prevent this from occurring.
++ s_sub_u32 ttmp0, ttmp0, 0x8
++ s_subb_u32 ttmp1, ttmp1, 0x0
++
++ s_branch L_EXIT_TRAP
++
++L_TRAP_CASE:
++ // Advance past trap instruction to prevent re-entry.
++ s_add_u32 ttmp0, ttmp0, 0x4
++ s_addc_u32 ttmp1, ttmp1, 0x0
++
++L_EXIT_TRAP:
++ s_and_b32 ttmp1, ttmp1, 0xFFFF
++
++ // Restore SQ_WAVE_STATUS.
++ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
++ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
++
++ // STATE_PRIV.BARRIER_COMPLETE may have changed since we read it.
++ // Only restore fields which the trap handler changes.
++ s_lshr_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_SCC_SHIFT
++ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV, SQ_WAVE_STATE_PRIV_SCC_SHIFT, \
++ SQ_WAVE_STATE_PRIV_POISON_ERR_SHIFT - SQ_WAVE_STATE_PRIV_SCC_SHIFT + 1), s_save_state_priv
++
++ s_rfe_b64 [ttmp0, ttmp1]
++
++L_SAVE:
++ // If VGPRs have been deallocated then terminate the wavefront.
++ // It has no remaining program to run and cannot save without VGPRs.
++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
++ s_bitcmp1_b32 s_save_tmp, SQ_WAVE_STATUS_NO_VGPRS_SHIFT
++ s_cbranch_scc0 L_HAVE_VGPRS
++ s_endpgm
++L_HAVE_VGPRS:
++
++ s_and_b32 s_save_pc_hi, s_save_pc_hi, 0x0000ffff //pc[47:32]
++ s_mov_b32 s_save_tmp, 0
++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_SAVE_CONTEXT_SHIFT, 1), s_save_tmp //clear saveCtx bit
++
++ /* inform SPI the readiness and wait for SPI's go signal */
++ s_mov_b32 s_save_exec_lo, exec_lo //save EXEC and use EXEC for the go signal from SPI
++ s_mov_b32 s_save_exec_hi, exec_hi
++ s_mov_b64 exec, 0x0 //clear EXEC to get ready to receive
++
++ s_sendmsg_rtn_b64 [exec_lo, exec_hi], sendmsg(MSG_RTN_SAVE_WAVE)
++ s_wait_idle
++
++ // Save first_wave flag so we can clear high bits of save address.
++ s_and_b32 s_save_tmp, s_save_spi_init_hi, S_SAVE_SPI_INIT_FIRST_WAVE_MASK
++ s_lshl_b32 s_save_tmp, s_save_tmp, (S_SAVE_PC_HI_FIRST_WAVE_SHIFT - S_SAVE_SPI_INIT_FIRST_WAVE_SHIFT)
++ s_or_b32 s_save_pc_hi, s_save_pc_hi, s_save_tmp
++
++ // Trap temporaries must be saved via VGPR but all VGPRs are in use.
++ // There is no ttmp space to hold the resource constant for VGPR save.
++ // Save v0 by itself since it requires only two SGPRs.
++ s_mov_b32 s_save_ttmps_lo, exec_lo
++ s_and_b32 s_save_ttmps_hi, exec_hi, 0xFFFF
++ s_mov_b32 exec_lo, 0xFFFFFFFF
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] scope:SCOPE_SYS
++ v_mov_b32 v0, 0x0
++ s_mov_b32 exec_lo, s_save_ttmps_lo
++ s_mov_b32 exec_hi, s_save_ttmps_hi
++
++ // Save trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
++ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
++ get_wave_size2(s_save_ttmps_hi)
++ get_vgpr_size_bytes(s_save_ttmps_lo, s_save_ttmps_hi)
++ get_svgpr_size_bytes(s_save_ttmps_hi)
++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_ttmps_hi
++ s_and_b32 s_save_ttmps_hi, s_save_spi_init_hi, 0xFFFF
++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, get_sgpr_size_bytes()
++ s_add_u32 s_save_ttmps_lo, s_save_ttmps_lo, s_save_spi_init_lo
++ s_addc_u32 s_save_ttmps_hi, s_save_ttmps_hi, 0x0
++
++ v_writelane_b32 v0, ttmp4, 0x4
++ v_writelane_b32 v0, ttmp5, 0x5
++ v_writelane_b32 v0, ttmp6, 0x6
++ v_writelane_b32 v0, ttmp7, 0x7
++ v_writelane_b32 v0, ttmp8, 0x8
++ v_writelane_b32 v0, ttmp9, 0x9
++ v_writelane_b32 v0, ttmp10, 0xA
++ v_writelane_b32 v0, ttmp11, 0xB
++ v_writelane_b32 v0, ttmp13, 0xD
++ v_writelane_b32 v0, exec_lo, 0xE
++ v_writelane_b32 v0, exec_hi, 0xF
++
++ s_mov_b32 exec_lo, 0x3FFF
++ s_mov_b32 exec_hi, 0x0
++ global_store_dword_addtid v0, [s_save_ttmps_lo, s_save_ttmps_hi] offset:0x40 scope:SCOPE_SYS
++ v_readlane_b32 ttmp14, v0, 0xE
++ v_readlane_b32 ttmp15, v0, 0xF
++ s_mov_b32 exec_lo, ttmp14
++ s_mov_b32 exec_hi, ttmp15
++
++ /* setup Resource Contants */
++ s_mov_b32 s_save_buf_rsrc0, s_save_spi_init_lo //base_addr_lo
++ s_and_b32 s_save_buf_rsrc1, s_save_spi_init_hi, 0x0000FFFF //base_addr_hi
++ s_or_b32 s_save_buf_rsrc1, s_save_buf_rsrc1, S_SAVE_BUF_RSRC_WORD1_STRIDE
++ s_mov_b32 s_save_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes) although not neccessarily inited
++ s_mov_b32 s_save_buf_rsrc3, S_SAVE_BUF_RSRC_WORD3_MISC
++
++ s_mov_b32 s_save_m0, m0
++
++ /* global mem offset */
++ s_mov_b32 s_save_mem_offset, 0x0
++ get_wave_size2(s_wave_size)
++
++ /* save first 4 VGPRs, needed for SGPR save */
++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_ENABLE_SAVE_4VGPR_EXEC_HI
++ s_mov_b32 exec_hi, 0x00000000
++ s_branch L_SAVE_4VGPR_WAVE32
++L_ENABLE_SAVE_4VGPR_EXEC_HI:
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++ s_branch L_SAVE_4VGPR_WAVE64
++L_SAVE_4VGPR_WAVE32:
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR Allocated in 4-GPR granularity
++
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
++ s_branch L_SAVE_HWREG
++
++L_SAVE_4VGPR_WAVE64:
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR Allocated in 4-GPR granularity
++
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
++
++ /* save HW registers */
++
++L_SAVE_HWREG:
++ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
++ get_svgpr_size_bytes(s_save_tmp)
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
++
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ v_mov_b32 v0, 0x0 //Offset[31:0] from buffer resource
++ v_mov_b32 v1, 0x0 //Offset[63:32] from buffer resource
++ v_mov_b32 v2, 0x0 //Set of SGPRs for TCP store
++ s_mov_b32 m0, 0x0 //Next lane of v2 to write to
++
++ // Ensure no further changes to barrier or LDS state.
++ // STATE_PRIV.BARRIER_COMPLETE may change up to this point.
++ s_barrier_signal -2
++ s_barrier_wait -2
++
++ // Re-read final state of BARRIER_COMPLETE field for save.
++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATE_PRIV)
++ s_and_b32 s_save_tmp, s_save_tmp, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
++ s_andn2_b32 s_save_state_priv, s_save_state_priv, SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK
++ s_or_b32 s_save_state_priv, s_save_state_priv, s_save_tmp
++
++ write_hwreg_to_v2(s_save_m0)
++ write_hwreg_to_v2(s_save_pc_lo)
++ s_andn2_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
++ write_hwreg_to_v2(s_save_tmp)
++ write_hwreg_to_v2(s_save_exec_lo)
++ write_hwreg_to_v2(s_save_exec_hi)
++ write_hwreg_to_v2(s_save_state_priv)
++
++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV)
++ write_hwreg_to_v2(s_save_tmp)
++
++ write_hwreg_to_v2(s_save_xnack_mask)
++
++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_MODE)
++ write_hwreg_to_v2(s_save_m0)
++
++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_LO)
++ write_hwreg_to_v2(s_save_m0)
++
++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_SCRATCH_BASE_HI)
++ write_hwreg_to_v2(s_save_m0)
++
++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_EXCP_FLAG_USER)
++ write_hwreg_to_v2(s_save_m0)
++
++ s_getreg_b32 s_save_m0, hwreg(HW_REG_WAVE_TRAP_CTRL)
++ write_hwreg_to_v2(s_save_m0)
++
++ s_getreg_b32 s_save_tmp, hwreg(HW_REG_WAVE_STATUS)
++ write_hwreg_to_v2(s_save_tmp)
++
++ s_get_barrier_state s_save_tmp, -1
++ s_wait_kmcnt (0)
++ write_hwreg_to_v2(s_save_tmp)
++
++ // Write HWREGs with 16 VGPR lanes. TTMPs occupy space after this.
++ s_mov_b32 exec_lo, 0xFFFF
++ s_mov_b32 exec_hi, 0x0
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++
++ // Write SGPRs with 32 VGPR lanes. This works in wave32 and wave64 mode.
++ s_mov_b32 exec_lo, 0xFFFFFFFF
++
++ /* save SGPRs */
++ // Save SGPR before LDS save, then the s0 to s4 can be used during LDS save...
++
++ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
++ get_svgpr_size_bytes(s_save_tmp)
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ s_mov_b32 ttmp13, 0x0 //next VGPR lane to copy SGPR into
++
++ s_mov_b32 m0, 0x0 //SGPR initial index value =0
++ s_nop 0x0 //Manually inserted wait states
++L_SAVE_SGPR_LOOP:
++ // SGPR is allocated in 16 SGPR granularity
++ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
++ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
++ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
++ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
++ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
++ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
++ s_movrels_b64 s12, s12 //s12 = s[12+m0], s13 = s[13+m0]
++ s_movrels_b64 s14, s14 //s14 = s[14+m0], s15 = s[15+m0]
++
++ write_16sgpr_to_v2(s0)
++
++ s_cmp_eq_u32 ttmp13, 0x20 //have 32 VGPR lanes filled?
++ s_cbranch_scc0 L_SAVE_SGPR_SKIP_TCP_STORE
++
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 0x80
++ s_mov_b32 ttmp13, 0x0
++ v_mov_b32 v2, 0x0
++L_SAVE_SGPR_SKIP_TCP_STORE:
++
++ s_add_u32 m0, m0, 16 //next sgpr index
++ s_cmp_lt_u32 m0, 96 //scc = (m0 < first 96 SGPR) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_SGPR_LOOP //first 96 SGPR save is complete?
++
++ //save the rest 12 SGPR
++ s_movrels_b64 s0, s0 //s0 = s[0+m0], s1 = s[1+m0]
++ s_movrels_b64 s2, s2 //s2 = s[2+m0], s3 = s[3+m0]
++ s_movrels_b64 s4, s4 //s4 = s[4+m0], s5 = s[5+m0]
++ s_movrels_b64 s6, s6 //s6 = s[6+m0], s7 = s[7+m0]
++ s_movrels_b64 s8, s8 //s8 = s[8+m0], s9 = s[9+m0]
++ s_movrels_b64 s10, s10 //s10 = s[10+m0], s11 = s[11+m0]
++ write_12sgpr_to_v2(s0)
++
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++
++ /* save LDS */
++
++L_SAVE_LDS:
++ // Change EXEC to all threads...
++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_ENABLE_SAVE_LDS_EXEC_HI
++ s_mov_b32 exec_hi, 0x00000000
++ s_branch L_SAVE_LDS_NORMAL
++L_ENABLE_SAVE_LDS_EXEC_HI:
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++L_SAVE_LDS_NORMAL:
++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
++ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //lds_size is zero?
++ s_cbranch_scc0 L_SAVE_LDS_DONE //no lds used? jump to L_SAVE_DONE
++
++ s_and_b32 s_save_tmp, s_save_pc_hi, S_SAVE_PC_HI_FIRST_WAVE_MASK
++ s_cbranch_scc0 L_SAVE_LDS_DONE
++
++ // first wave do LDS save;
++
++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
++ s_mov_b32 s_save_buf_rsrc2, s_save_alloc_size //NUM_RECORDS in bytes
++
++ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
++ //
++ get_vgpr_size_bytes(s_save_mem_offset, s_wave_size)
++ get_svgpr_size_bytes(s_save_tmp)
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s_save_tmp
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_sgpr_size_bytes()
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, get_hwreg_size_bytes()
++
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ //load 0~63*4(byte address) to vgpr v0
++ v_mbcnt_lo_u32_b32 v0, -1, 0
++ v_mbcnt_hi_u32_b32 v0, -1, v0
++ v_mul_u32_u24 v0, 4, v0
++
++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_mov_b32 m0, 0x0
++ s_cbranch_scc1 L_SAVE_LDS_W64
++
++L_SAVE_LDS_W32:
++ s_mov_b32 s3, 128
++ s_nop 0
++ s_nop 0
++ s_nop 0
++L_SAVE_LDS_LOOP_W32:
++ ds_read_b32 v1, v0
++ s_wait_idle
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++
++ s_add_u32 m0, m0, s3 //every buffer_store_lds does 128 bytes
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
++ v_add_nc_u32 v0, v0, 128 //mem offset increased by 128 bytes
++ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_LDS_LOOP_W32 //LDS save is complete?
++
++ s_branch L_SAVE_LDS_DONE
++
++L_SAVE_LDS_W64:
++ s_mov_b32 s3, 256
++ s_nop 0
++ s_nop 0
++ s_nop 0
++L_SAVE_LDS_LOOP_W64:
++ ds_read_b32 v1, v0
++ s_wait_idle
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++
++ s_add_u32 m0, m0, s3 //every buffer_store_lds does 256 bytes
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, s3
++ v_add_nc_u32 v0, v0, 256 //mem offset increased by 256 bytes
++ s_cmp_lt_u32 m0, s_save_alloc_size //scc=(m0 < s_save_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_LDS_LOOP_W64 //LDS save is complete?
++
++L_SAVE_LDS_DONE:
++ /* save VGPRs - set the Rest VGPRs */
++L_SAVE_VGPR:
++ // VGPR SR memory offset: 0
++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_ENABLE_SAVE_VGPR_EXEC_HI
++ s_mov_b32 s_save_mem_offset, (0+128*4) // for the rest VGPRs
++ s_mov_b32 exec_hi, 0x00000000
++ s_branch L_SAVE_VGPR_NORMAL
++L_ENABLE_SAVE_VGPR_EXEC_HI:
++ s_mov_b32 s_save_mem_offset, (0+256*4) // for the rest VGPRs
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++L_SAVE_VGPR_NORMAL:
++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
++ s_add_u32 s_save_alloc_size, s_save_alloc_size, 1
++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
++ //determine it is wave32 or wave64
++ s_lshr_b32 m0, s_wave_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_SAVE_VGPR_WAVE64
++
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR Allocated in 4-GPR granularity
++
++ // VGPR store using dw burst
++ s_mov_b32 m0, 0x4 //VGPR initial index value =4
++ s_cmp_lt_u32 m0, s_save_alloc_size
++ s_cbranch_scc0 L_SAVE_VGPR_END
++
++L_SAVE_VGPR_W32_LOOP:
++ v_movrels_b32 v0, v0 //v0 = v[0+m0]
++ v_movrels_b32 v1, v1 //v1 = v[1+m0]
++ v_movrels_b32 v2, v2 //v2 = v[2+m0]
++ v_movrels_b32 v3, v3 //v3 = v[3+m0]
++
++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*2
++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:128*3
++
++ s_add_u32 m0, m0, 4 //next vgpr index
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128*4 //every buffer_store_dword does 128 bytes
++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_VGPR_W32_LOOP //VGPR save is complete?
++
++ s_branch L_SAVE_VGPR_END
++
++L_SAVE_VGPR_WAVE64:
++ s_mov_b32 s_save_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR store using dw burst
++ s_mov_b32 m0, 0x4 //VGPR initial index value =4
++ s_cmp_lt_u32 m0, s_save_alloc_size
++ s_cbranch_scc0 L_SAVE_SHARED_VGPR
++
++L_SAVE_VGPR_W64_LOOP:
++ v_movrels_b32 v0, v0 //v0 = v[0+m0]
++ v_movrels_b32 v1, v1 //v1 = v[1+m0]
++ v_movrels_b32 v2, v2 //v2 = v[2+m0]
++ v_movrels_b32 v3, v3 //v3 = v[3+m0]
++
++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++ buffer_store_dword v1, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256
++ buffer_store_dword v2, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*2
++ buffer_store_dword v3, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS offset:256*3
++
++ s_add_u32 m0, m0, 4 //next vgpr index
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 256*4 //every buffer_store_dword does 256 bytes
++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_VGPR_W64_LOOP //VGPR save is complete?
++
++L_SAVE_SHARED_VGPR:
++ s_getreg_b32 s_save_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
++ s_and_b32 s_save_alloc_size, s_save_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
++ s_cbranch_scc0 L_SAVE_VGPR_END //no shared_vgpr used? jump to L_SAVE_LDS
++ s_lshl_b32 s_save_alloc_size, s_save_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
++ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
++ //save shared_vgpr will start from the index of m0
++ s_add_u32 s_save_alloc_size, s_save_alloc_size, m0
++ s_mov_b32 exec_lo, 0xFFFFFFFF
++ s_mov_b32 exec_hi, 0x00000000
++
++L_SAVE_SHARED_VGPR_WAVE64_LOOP:
++ v_movrels_b32 v0, v0 //v0 = v[0+m0]
++ buffer_store_dword v0, v0, s_save_buf_rsrc0, s_save_mem_offset scope:SCOPE_SYS
++ s_add_u32 m0, m0, 1 //next vgpr index
++ s_add_u32 s_save_mem_offset, s_save_mem_offset, 128
++ s_cmp_lt_u32 m0, s_save_alloc_size //scc = (m0 < s_save_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_SAVE_SHARED_VGPR_WAVE64_LOOP //SHARED_VGPR save is complete?
++
++L_SAVE_VGPR_END:
++ s_branch L_END_PGM
++
++L_RESTORE:
++ /* Setup Resource Contants */
++ s_mov_b32 s_restore_buf_rsrc0, s_restore_spi_init_lo //base_addr_lo
++ s_and_b32 s_restore_buf_rsrc1, s_restore_spi_init_hi, 0x0000FFFF //base_addr_hi
++ s_or_b32 s_restore_buf_rsrc1, s_restore_buf_rsrc1, S_RESTORE_BUF_RSRC_WORD1_STRIDE
++ s_mov_b32 s_restore_buf_rsrc2, 0 //NUM_RECORDS initial value = 0 (in bytes)
++ s_mov_b32 s_restore_buf_rsrc3, S_RESTORE_BUF_RSRC_WORD3_MISC
++
++ // Save s_restore_spi_init_hi for later use.
++ s_mov_b32 s_restore_spi_init_hi_save, s_restore_spi_init_hi
++
++ //determine it is wave32 or wave64
++ get_wave_size2(s_restore_size)
++
++ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
++ s_cbranch_scc0 L_RESTORE_VGPR
++
++ /* restore LDS */
++L_RESTORE_LDS:
++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_ENABLE_RESTORE_LDS_EXEC_HI
++ s_mov_b32 exec_hi, 0x00000000
++ s_branch L_RESTORE_LDS_NORMAL
++L_ENABLE_RESTORE_LDS_EXEC_HI:
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++L_RESTORE_LDS_NORMAL:
++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_LDS_SIZE_SIZE)
++ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //lds_size is zero?
++ s_cbranch_scc0 L_RESTORE_VGPR //no lds used? jump to L_RESTORE_VGPR
++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, SQ_WAVE_LDS_ALLOC_GRANULARITY
++ s_mov_b32 s_restore_buf_rsrc2, s_restore_alloc_size //NUM_RECORDS in bytes
++
++ // LDS at offset: size(VGPR)+size(SVGPR)+SIZE(SGPR)+SIZE(HWREG)
++ //
++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
++ get_svgpr_size_bytes(s_restore_tmp)
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_hwreg_size_bytes()
++
++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_mov_b32 m0, 0x0
++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64
++
++L_RESTORE_LDS_LOOP_W32:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
++ s_wait_idle
++ ds_store_addtid_b32 v0
++ s_add_u32 m0, m0, 128 // 128 DW
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128 //mem offset increased by 128DW
++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W32 //LDS restore is complete?
++ s_branch L_RESTORE_VGPR
++
++L_RESTORE_LDS_LOOP_W64:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset
++ s_wait_idle
++ ds_store_addtid_b32 v0
++ s_add_u32 m0, m0, 256 // 256 DW
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256 //mem offset increased by 256DW
++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc=(m0 < s_restore_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_RESTORE_LDS_LOOP_W64 //LDS restore is complete?
++
++ /* restore VGPRs */
++L_RESTORE_VGPR:
++ // VGPR SR memory offset : 0
++ s_mov_b32 s_restore_mem_offset, 0x0
++ s_mov_b32 exec_lo, 0xFFFFFFFF //need every thread from now on
++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_ENABLE_RESTORE_VGPR_EXEC_HI
++ s_mov_b32 exec_hi, 0x00000000
++ s_branch L_RESTORE_VGPR_NORMAL
++L_ENABLE_RESTORE_VGPR_EXEC_HI:
++ s_mov_b32 exec_hi, 0xFFFFFFFF
++L_RESTORE_VGPR_NORMAL:
++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
++ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, 1
++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 2 //Number of VGPRs = (vgpr_size + 1) * 4 (non-zero value)
++ //determine it is wave32 or wave64
++ s_lshr_b32 m0, s_restore_size, S_WAVE_SIZE
++ s_and_b32 m0, m0, 1
++ s_cmp_eq_u32 m0, 1
++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64
++
++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR load using dw burst
++ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v1, v0 will be the last
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4
++ s_mov_b32 m0, 4 //VGPR initial index value = 4
++ s_cmp_lt_u32 m0, s_restore_alloc_size
++ s_cbranch_scc0 L_RESTORE_SGPR
++
++L_RESTORE_VGPR_WAVE32_LOOP:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128
++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*2
++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:128*3
++ s_wait_idle
++ v_movreld_b32 v0, v0 //v[0+m0] = v0
++ v_movreld_b32 v1, v1
++ v_movreld_b32 v2, v2
++ v_movreld_b32 v3, v3
++ s_add_u32 m0, m0, 4 //next vgpr index
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128*4 //every buffer_load_dword does 128 bytes
++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE32_LOOP //VGPR restore (except v0) is complete?
++
++ /* VGPR restore on v0 */
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128
++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*2
++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:128*3
++ s_wait_idle
++
++ s_branch L_RESTORE_SGPR
++
++L_RESTORE_VGPR_WAVE64:
++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // VGPR load using dw burst
++ s_mov_b32 s_restore_mem_offset_save, s_restore_mem_offset // restore start with v4, v0 will be the last
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4
++ s_mov_b32 m0, 4 //VGPR initial index value = 4
++ s_cmp_lt_u32 m0, s_restore_alloc_size
++ s_cbranch_scc0 L_RESTORE_SHARED_VGPR
++
++L_RESTORE_VGPR_WAVE64_LOOP:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256
++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*2
++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS offset:256*3
++ s_wait_idle
++ v_movreld_b32 v0, v0 //v[0+m0] = v0
++ v_movreld_b32 v1, v1
++ v_movreld_b32 v2, v2
++ v_movreld_b32 v3, v3
++ s_add_u32 m0, m0, 4 //next vgpr index
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 256*4 //every buffer_load_dword does 256 bytes
++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_RESTORE_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
++
++L_RESTORE_SHARED_VGPR:
++ s_getreg_b32 s_restore_alloc_size, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE) //shared_vgpr_size
++ s_and_b32 s_restore_alloc_size, s_restore_alloc_size, 0xFFFFFFFF //shared_vgpr_size is zero?
++ s_cbranch_scc0 L_RESTORE_V0 //no shared_vgpr used?
++ s_lshl_b32 s_restore_alloc_size, s_restore_alloc_size, 3 //Number of SHARED_VGPRs = shared_vgpr_size * 8 (non-zero value)
++ //m0 now has the value of normal vgpr count, just add the m0 with shared_vgpr count to get the total count.
++ //restore shared_vgpr will start from the index of m0
++ s_add_u32 s_restore_alloc_size, s_restore_alloc_size, m0
++ s_mov_b32 exec_lo, 0xFFFFFFFF
++ s_mov_b32 exec_hi, 0x00000000
++L_RESTORE_SHARED_VGPR_WAVE64_LOOP:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset scope:SCOPE_SYS
++ s_wait_idle
++ v_movreld_b32 v0, v0 //v[0+m0] = v0
++ s_add_u32 m0, m0, 1 //next vgpr index
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 128
++ s_cmp_lt_u32 m0, s_restore_alloc_size //scc = (m0 < s_restore_alloc_size) ? 1 : 0
++ s_cbranch_scc1 L_RESTORE_SHARED_VGPR_WAVE64_LOOP //VGPR restore (except v0) is complete?
++
++ s_mov_b32 exec_hi, 0xFFFFFFFF //restore back exec_hi before restoring V0!!
++
++ /* VGPR restore on v0 */
++L_RESTORE_V0:
++ buffer_load_dword v0, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS
++ buffer_load_dword v1, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256
++ buffer_load_dword v2, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*2
++ buffer_load_dword v3, v0, s_restore_buf_rsrc0, s_restore_mem_offset_save scope:SCOPE_SYS offset:256*3
++ s_wait_idle
++
++ /* restore SGPRs */
++ //will be 2+8+16*6
++ // SGPR SR memory offset : size(VGPR)+size(SVGPR)
++L_RESTORE_SGPR:
++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
++ get_svgpr_size_bytes(s_restore_tmp)
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
++ s_sub_u32 s_restore_mem_offset, s_restore_mem_offset, 20*4 //s108~s127 is not saved
++
++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ s_mov_b32 m0, s_sgpr_save_num
++
++ read_4sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_sub_u32 m0, m0, 4 // Restore from S[0] to S[104]
++ s_nop 0 // hazard SALU M0=> S_MOVREL
++
++ s_movreld_b64 s0, s0 //s[0+m0] = s0
++ s_movreld_b64 s2, s2
++
++ read_8sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_sub_u32 m0, m0, 8 // Restore from S[0] to S[96]
++ s_nop 0 // hazard SALU M0=> S_MOVREL
++
++ s_movreld_b64 s0, s0 //s[0+m0] = s0
++ s_movreld_b64 s2, s2
++ s_movreld_b64 s4, s4
++ s_movreld_b64 s6, s6
++
++ L_RESTORE_SGPR_LOOP:
++ read_16sgpr_from_mem(s0, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_sub_u32 m0, m0, 16 // Restore from S[n] to S[0]
++ s_nop 0 // hazard SALU M0=> S_MOVREL
++
++ s_movreld_b64 s0, s0 //s[0+m0] = s0
++ s_movreld_b64 s2, s2
++ s_movreld_b64 s4, s4
++ s_movreld_b64 s6, s6
++ s_movreld_b64 s8, s8
++ s_movreld_b64 s10, s10
++ s_movreld_b64 s12, s12
++ s_movreld_b64 s14, s14
++
++ s_cmp_eq_u32 m0, 0 //scc = (m0 < s_sgpr_save_num) ? 1 : 0
++ s_cbranch_scc0 L_RESTORE_SGPR_LOOP
++
++ // s_barrier with STATE_PRIV.TRAP_AFTER_INST=1, STATUS.PRIV=1 incorrectly asserts debug exception.
++ // Clear DEBUG_EN before and restore MODE after the barrier.
++ s_setreg_imm32_b32 hwreg(HW_REG_WAVE_MODE), 0
++
++ /* restore HW registers */
++L_RESTORE_HWREG:
++ // HWREG SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)
++ get_vgpr_size_bytes(s_restore_mem_offset, s_restore_size)
++ get_svgpr_size_bytes(s_restore_tmp)
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, s_restore_tmp
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, get_sgpr_size_bytes()
++
++ s_mov_b32 s_restore_buf_rsrc2, 0x1000000 //NUM_RECORDS in bytes
++
++ // Restore s_restore_spi_init_hi before the saved value gets clobbered.
++ s_mov_b32 s_restore_spi_init_hi, s_restore_spi_init_hi_save
++
++ read_hwreg_from_mem(s_restore_m0, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_pc_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_pc_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_exec_lo, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_exec_hi, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_state_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_excp_flag_priv, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_xnack_mask, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_mode, s_restore_buf_rsrc0, s_restore_mem_offset)
++ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_LO), s_restore_flat_scratch
++
++ read_hwreg_from_mem(s_restore_flat_scratch, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_setreg_b32 hwreg(HW_REG_WAVE_SCRATCH_BASE_HI), s_restore_flat_scratch
++
++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_USER), s_restore_tmp
++
++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++ s_setreg_b32 hwreg(HW_REG_WAVE_TRAP_CTRL), s_restore_tmp
++
++ // Only the first wave needs to restore the workgroup barrier.
++ s_and_b32 s_restore_tmp, s_restore_spi_init_hi, S_RESTORE_SPI_INIT_FIRST_WAVE_MASK
++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
++
++ // Skip over WAVE_STATUS, since there is no state to restore from it
++ s_add_u32 s_restore_mem_offset, s_restore_mem_offset, 4
++
++ read_hwreg_from_mem(s_restore_tmp, s_restore_buf_rsrc0, s_restore_mem_offset)
++ s_wait_idle
++
++ s_bitcmp1_b32 s_restore_tmp, BARRIER_STATE_VALID_OFFSET
++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
++
++ // extract the saved signal count from s_restore_tmp
++ s_lshr_b32 s_restore_tmp, s_restore_tmp, BARRIER_STATE_SIGNAL_OFFSET
++
++ // We need to call s_barrier_signal repeatedly to restore the signal
++ // count of the work group barrier. The member count is already
++ // initialized with the number of waves in the work group.
++L_BARRIER_RESTORE_LOOP:
++ s_and_b32 s_restore_tmp, s_restore_tmp, s_restore_tmp
++ s_cbranch_scc0 L_SKIP_BARRIER_RESTORE
++ s_barrier_signal -1
++ s_add_i32 s_restore_tmp, s_restore_tmp, -1
++ s_branch L_BARRIER_RESTORE_LOOP
++
++L_SKIP_BARRIER_RESTORE:
++
++ s_mov_b32 m0, s_restore_m0
++ s_mov_b32 exec_lo, s_restore_exec_lo
++ s_mov_b32 exec_hi, s_restore_exec_hi
++
++ // EXCP_FLAG_PRIV.SAVE_CONTEXT and HOST_TRAP may have changed.
++ // Only restore the other fields to avoid clobbering them.
++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, 0, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_1_SIZE), s_restore_excp_flag_priv
++ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SIZE), s_restore_excp_flag_priv
++ s_lshr_b32 s_restore_excp_flag_priv, s_restore_excp_flag_priv, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT - SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_2_SHIFT
++ s_setreg_b32 hwreg(HW_REG_WAVE_EXCP_FLAG_PRIV, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SHIFT, SQ_WAVE_EXCP_FLAG_PRIV_RESTORE_PART_3_SIZE), s_restore_excp_flag_priv
++
++ s_setreg_b32 hwreg(HW_REG_WAVE_MODE), s_restore_mode
++
++ // Restore trap temporaries 4-11, 13 initialized by SPI debug dispatch logic
++ // ttmp SR memory offset : size(VGPR)+size(SVGPR)+size(SGPR)+0x40
++ get_vgpr_size_bytes(s_restore_ttmps_lo, s_restore_size)
++ get_svgpr_size_bytes(s_restore_ttmps_hi)
++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_ttmps_hi
++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, get_sgpr_size_bytes()
++ s_add_u32 s_restore_ttmps_lo, s_restore_ttmps_lo, s_restore_buf_rsrc0
++ s_addc_u32 s_restore_ttmps_hi, s_restore_buf_rsrc1, 0x0
++ s_and_b32 s_restore_ttmps_hi, s_restore_ttmps_hi, 0xFFFF
++ s_load_dwordx4 [ttmp4, ttmp5, ttmp6, ttmp7], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x50 scope:SCOPE_SYS
++ s_load_dwordx4 [ttmp8, ttmp9, ttmp10, ttmp11], [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x60 scope:SCOPE_SYS
++ s_load_dword ttmp13, [s_restore_ttmps_lo, s_restore_ttmps_hi], 0x74 scope:SCOPE_SYS
++ s_wait_idle
++
++ s_and_b32 s_restore_pc_hi, s_restore_pc_hi, 0x0000ffff //pc[47:32] //Do it here in order not to affect STATUS
++ s_and_b64 exec, exec, exec // Restore STATUS.EXECZ, not writable by s_setreg_b32
++ s_and_b64 vcc, vcc, vcc // Restore STATUS.VCCZ, not writable by s_setreg_b32
++
++ s_setreg_b32 hwreg(HW_REG_WAVE_STATE_PRIV), s_restore_state_priv // SCC is included, which is changed by previous salu
++
++ // Make barrier and LDS state visible to all waves in the group.
++ // STATE_PRIV.BARRIER_COMPLETE may change after this point.
++ s_barrier_signal -2
++ s_barrier_wait -2
++
++ s_rfe_b64 s_restore_pc_lo //Return to the main shader program and resume execution
++
++L_END_PGM:
++ s_endpgm_saved
++end
++
++function write_hwreg_to_v2(s)
++ // Copy into VGPR for later TCP store.
++ v_writelane_b32 v2, s, m0
++ s_add_u32 m0, m0, 0x1
++end
++
++
++function write_16sgpr_to_v2(s)
++ // Copy into VGPR for later TCP store.
++ for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
++ v_writelane_b32 v2, s[sgpr_idx], ttmp13
++ s_add_u32 ttmp13, ttmp13, 0x1
++ end
++end
++
++function write_12sgpr_to_v2(s)
++ // Copy into VGPR for later TCP store.
++ for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
++ v_writelane_b32 v2, s[sgpr_idx], ttmp13
++ s_add_u32 ttmp13, ttmp13, 0x1
++ end
++end
++
++function read_hwreg_from_mem(s, s_rsrc, s_mem_offset)
++ s_buffer_load_dword s, s_rsrc, s_mem_offset scope:SCOPE_SYS
++ s_add_u32 s_mem_offset, s_mem_offset, 4
++end
++
++function read_16sgpr_from_mem(s, s_rsrc, s_mem_offset)
++ s_sub_u32 s_mem_offset, s_mem_offset, 4*16
++ s_buffer_load_dwordx16 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
++end
++
++function read_8sgpr_from_mem(s, s_rsrc, s_mem_offset)
++ s_sub_u32 s_mem_offset, s_mem_offset, 4*8
++ s_buffer_load_dwordx8 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
++end
++
++function read_4sgpr_from_mem(s, s_rsrc, s_mem_offset)
++ s_sub_u32 s_mem_offset, s_mem_offset, 4*4
++ s_buffer_load_dwordx4 s, s_rsrc, s_mem_offset scope:SCOPE_SYS
++end
++
++function get_vgpr_size_bytes(s_vgpr_size_byte, s_size)
++ s_getreg_b32 s_vgpr_size_byte, hwreg(HW_REG_WAVE_GPR_ALLOC,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SHIFT,SQ_WAVE_GPR_ALLOC_VGPR_SIZE_SIZE)
++ s_add_u32 s_vgpr_size_byte, s_vgpr_size_byte, 1
++ s_bitcmp1_b32 s_size, S_WAVE_SIZE
++ s_cbranch_scc1 L_ENABLE_SHIFT_W64
++ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+7) //Number of VGPRs = (vgpr_size + 1) * 4 * 32 * 4 (non-zero value)
++ s_branch L_SHIFT_DONE
++L_ENABLE_SHIFT_W64:
++ s_lshl_b32 s_vgpr_size_byte, s_vgpr_size_byte, (2+8) //Number of VGPRs = (vgpr_size + 1) * 4 * 64 * 4 (non-zero value)
++L_SHIFT_DONE:
++end
++
++function get_svgpr_size_bytes(s_svgpr_size_byte)
++ s_getreg_b32 s_svgpr_size_byte, hwreg(HW_REG_WAVE_LDS_ALLOC,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SHIFT,SQ_WAVE_LDS_ALLOC_VGPR_SHARED_SIZE_SIZE)
++ s_lshl_b32 s_svgpr_size_byte, s_svgpr_size_byte, (3+7)
++end
++
++function get_sgpr_size_bytes
++ return 512
++end
++
++function get_hwreg_size_bytes
++ return 128
++end
++
++function get_wave_size2(s_reg)
++ s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
++ s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
++end
+--
+2.39.5
+
--- /dev/null
+From 5415d916549bb61e3ad0ca9cab93982e1aaa578f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 23:13:39 -0800
+Subject: eth: iavf: extend the netdev_lock usage
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit afc664987ab318c227ebc0f639f5afc921aaf674 ]
+
+iavf uses the netdev->lock already to protect shapers.
+In an upcoming series we'll try to protect NAPI instances
+with netdev->lock.
+
+We need to modify the protection a bit. All NAPI related
+calls in the driver need to be consistently under the lock.
+This will allow us to easily switch to a "we already hold
+the lock" NAPI API later.
+
+register_netdevice(), OTOH, must not be called under
+the netdev_lock() as we do not intend to have an
+"already locked" version of this call.
+
+Link: https://patch.msgid.link/20250111071339.3709071-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 53 +++++++++++++++++----
+ 1 file changed, 45 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 2b8700abe56bb..7c427003184d5 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1983,6 +1983,7 @@ static int iavf_reinit_interrupt_scheme(struct iavf_adapter *adapter, bool runni
+ static void iavf_finish_config(struct work_struct *work)
+ {
+ struct iavf_adapter *adapter;
++ bool netdev_released = false;
+ int pairs, err;
+
+ adapter = container_of(work, struct iavf_adapter, finish_config);
+@@ -2003,7 +2004,16 @@ static void iavf_finish_config(struct work_struct *work)
+
+ switch (adapter->state) {
+ case __IAVF_DOWN:
++ /* Set the real number of queues when reset occurs while
++ * state == __IAVF_DOWN
++ */
++ pairs = adapter->num_active_queues;
++ netif_set_real_num_rx_queues(adapter->netdev, pairs);
++ netif_set_real_num_tx_queues(adapter->netdev, pairs);
++
+ if (adapter->netdev->reg_state != NETREG_REGISTERED) {
++ mutex_unlock(&adapter->netdev->lock);
++ netdev_released = true;
+ err = register_netdevice(adapter->netdev);
+ if (err) {
+ dev_err(&adapter->pdev->dev, "Unable to register netdev (%d)\n",
+@@ -2018,11 +2028,7 @@ static void iavf_finish_config(struct work_struct *work)
+ goto out;
+ }
+ }
+-
+- /* Set the real number of queues when reset occurs while
+- * state == __IAVF_DOWN
+- */
+- fallthrough;
++ break;
+ case __IAVF_RUNNING:
+ pairs = adapter->num_active_queues;
+ netif_set_real_num_rx_queues(adapter->netdev, pairs);
+@@ -2035,7 +2041,8 @@ static void iavf_finish_config(struct work_struct *work)
+
+ out:
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&adapter->netdev->lock);
++ if (!netdev_released)
++ mutex_unlock(&adapter->netdev->lock);
+ rtnl_unlock();
+ }
+
+@@ -2728,12 +2735,16 @@ static void iavf_watchdog_task(struct work_struct *work)
+ struct iavf_adapter *adapter = container_of(work,
+ struct iavf_adapter,
+ watchdog_task.work);
++ struct net_device *netdev = adapter->netdev;
+ struct iavf_hw *hw = &adapter->hw;
+ u32 reg_val;
+
++ mutex_lock(&netdev->lock);
+ if (!mutex_trylock(&adapter->crit_lock)) {
+- if (adapter->state == __IAVF_REMOVE)
++ if (adapter->state == __IAVF_REMOVE) {
++ mutex_unlock(&netdev->lock);
+ return;
++ }
+
+ goto restart_watchdog;
+ }
+@@ -2745,30 +2756,35 @@ static void iavf_watchdog_task(struct work_struct *work)
+ case __IAVF_STARTUP:
+ iavf_startup(adapter);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(30));
+ return;
+ case __IAVF_INIT_VERSION_CHECK:
+ iavf_init_version_check(adapter);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(30));
+ return;
+ case __IAVF_INIT_GET_RESOURCES:
+ iavf_init_get_resources(adapter);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+ case __IAVF_INIT_EXTENDED_CAPS:
+ iavf_init_process_extended_caps(adapter);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ iavf_init_config_adapter(adapter);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+@@ -2780,6 +2796,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ * as it can loop forever
+ */
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ return;
+ }
+ if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
+@@ -2788,6 +2805,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+ iavf_shutdown_adminq(hw);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task, (5 * HZ));
+ return;
+@@ -2795,6 +2813,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ /* Try again from failed step*/
+ iavf_change_state(adapter, adapter->last_state);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
+ return;
+ case __IAVF_COMM_FAILED:
+@@ -2807,6 +2826,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ return;
+ }
+ reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+@@ -2826,12 +2846,14 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->aq_required = 0;
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task,
+ msecs_to_jiffies(10));
+ return;
+ case __IAVF_RESETTING:
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ HZ * 2);
+ return;
+@@ -2862,6 +2884,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ case __IAVF_REMOVE:
+ default:
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ return;
+ }
+
+@@ -2873,12 +2896,14 @@ static void iavf_watchdog_task(struct work_struct *work)
+ dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task, HZ * 2);
+ return;
+ }
+
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ restart_watchdog:
+ if (adapter->state >= __IAVF_DOWN)
+ queue_work(adapter->wq, &adapter->adminq_task);
+@@ -4355,14 +4380,17 @@ static int iavf_open(struct net_device *netdev)
+ return -EIO;
+ }
+
++ mutex_lock(&netdev->lock);
+ while (!mutex_trylock(&adapter->crit_lock)) {
+ /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
+ * is already taken and iavf_open is called from an upper
+ * device's notifier reacting on NETDEV_REGISTER event.
+ * We have to leave here to avoid dead lock.
+ */
+- if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER)
++ if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) {
++ mutex_unlock(&netdev->lock);
+ return -EBUSY;
++ }
+
+ usleep_range(500, 1000);
+ }
+@@ -4411,6 +4439,7 @@ static int iavf_open(struct net_device *netdev)
+ iavf_irq_enable(adapter, true);
+
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+
+ return 0;
+
+@@ -4423,6 +4452,7 @@ static int iavf_open(struct net_device *netdev)
+ iavf_free_all_tx_resources(adapter);
+ err_unlock:
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+
+ return err;
+ }
+@@ -4444,10 +4474,12 @@ static int iavf_close(struct net_device *netdev)
+ u64 aq_to_restore;
+ int status;
+
++ mutex_lock(&netdev->lock);
+ mutex_lock(&adapter->crit_lock);
+
+ if (adapter->state <= __IAVF_DOWN_PENDING) {
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+ return 0;
+ }
+
+@@ -4481,6 +4513,7 @@ static int iavf_close(struct net_device *netdev)
+ iavf_free_traffic_irqs(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+
+ /* We explicitly don't free resources here because the hardware is
+ * still active and can DMA into memory. Resources are cleared in
+@@ -5357,6 +5390,7 @@ static int iavf_suspend(struct device *dev_d)
+
+ netif_device_detach(netdev);
+
++ mutex_lock(&netdev->lock);
+ mutex_lock(&adapter->crit_lock);
+
+ if (netif_running(netdev)) {
+@@ -5368,6 +5402,7 @@ static int iavf_suspend(struct device *dev_d)
+ iavf_reset_interrupt_capability(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+
+ return 0;
+ }
+@@ -5466,6 +5501,7 @@ static void iavf_remove(struct pci_dev *pdev)
+ if (netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(netdev);
+
++ mutex_lock(&netdev->lock);
+ mutex_lock(&adapter->crit_lock);
+ dev_info(&adapter->pdev->dev, "Removing device\n");
+ iavf_change_state(adapter, __IAVF_REMOVE);
+@@ -5502,6 +5538,7 @@ static void iavf_remove(struct pci_dev *pdev)
+ mutex_destroy(&hw->aq.asq_mutex);
+ mutex_unlock(&adapter->crit_lock);
+ mutex_destroy(&adapter->crit_lock);
++ mutex_unlock(&netdev->lock);
+
+ iounmap(hw->hw_addr);
+ pci_release_regions(pdev);
+--
+2.39.5
+
--- /dev/null
+From 4464317471c0fee58245366a027fc450194237ae Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 20:32:07 -0800
+Subject: flow_dissector: Fix handling of mixed port and port-range keys
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 3e5796862c692ea608d96f0a1437f9290f44953a ]
+
+This patch fixes a bug in TC flower filter where rules combining a
+specific destination port with a source port range weren't working
+correctly.
+
+The specific case was when users tried to configure rules like:
+
+tc filter add dev ens38 ingress protocol ip flower ip_proto udp \
+dst_port 5000 src_port 2000-3000 action drop
+
+The root cause was in the flow dissector code. While both
+FLOW_DISSECTOR_KEY_PORTS and FLOW_DISSECTOR_KEY_PORTS_RANGE flags
+were being set correctly in the classifier, the __skb_flow_dissect_ports()
+function was only populating one of them: whichever came first in
+the enum check. This meant that when the code needed both a specific
+port and a port range, one of them would be left as 0, causing the
+filter to not match packets as expected.
+
+Fix it by removing the either/or logic and instead checking and
+populating both key types independently when they're in use.
+
+Fixes: 8ffb055beae5 ("cls_flower: Fix the behavior using port ranges with hw-offload")
+Reported-by: Qiang Zhang <dtzq01@gmail.com>
+Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/
+Cc: Yoshiki Komachi <komachi.yoshiki@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Reviewed-by: Ido Schimmel <idosch@nvidia.com>
+Link: https://patch.msgid.link/20250218043210.732959-2-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/flow_dissector.c | 31 +++++++++++++++++++------------
+ 1 file changed, 19 insertions(+), 12 deletions(-)
+
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index 5db41bf2ed93e..c33af3ef0b790 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -853,23 +853,30 @@ __skb_flow_dissect_ports(const struct sk_buff *skb,
+ void *target_container, const void *data,
+ int nhoff, u8 ip_proto, int hlen)
+ {
+- enum flow_dissector_key_id dissector_ports = FLOW_DISSECTOR_KEY_MAX;
+- struct flow_dissector_key_ports *key_ports;
++ struct flow_dissector_key_ports_range *key_ports_range = NULL;
++ struct flow_dissector_key_ports *key_ports = NULL;
++ __be32 ports;
+
+ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
+- dissector_ports = FLOW_DISSECTOR_KEY_PORTS;
+- else if (dissector_uses_key(flow_dissector,
+- FLOW_DISSECTOR_KEY_PORTS_RANGE))
+- dissector_ports = FLOW_DISSECTOR_KEY_PORTS_RANGE;
++ key_ports = skb_flow_dissector_target(flow_dissector,
++ FLOW_DISSECTOR_KEY_PORTS,
++ target_container);
+
+- if (dissector_ports == FLOW_DISSECTOR_KEY_MAX)
++ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS_RANGE))
++ key_ports_range = skb_flow_dissector_target(flow_dissector,
++ FLOW_DISSECTOR_KEY_PORTS_RANGE,
++ target_container);
++
++ if (!key_ports && !key_ports_range)
+ return;
+
+- key_ports = skb_flow_dissector_target(flow_dissector,
+- dissector_ports,
+- target_container);
+- key_ports->ports = __skb_flow_get_ports(skb, nhoff, ip_proto,
+- data, hlen);
++ ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
++
++ if (key_ports)
++ key_ports->ports = ports;
++
++ if (key_ports_range)
++ key_ports_range->tp.ports = ports;
+ }
+
+ static void
+--
+2.39.5
+
--- /dev/null
+From f342b034ad42571be2f681b45f54b85fda236f14 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 20:32:09 -0800
+Subject: flow_dissector: Fix port range key handling in BPF conversion
+
+From: Cong Wang <xiyou.wangcong@gmail.com>
+
+[ Upstream commit 69ab34f705fbfabcace64b5d53bb7a4450fac875 ]
+
+Fix how port range keys are handled in __skb_flow_bpf_to_target() by:
+- Separating PORTS and PORTS_RANGE key handling
+- Using correct key_ports_range structure for range keys
+- Properly initializing both key types independently
+
+This ensures port range information is correctly stored in its dedicated
+structure rather than incorrectly using the regular ports key structure.
+
+Fixes: 59fb9b62fb6c ("flow_dissector: Fix to use new variables for port ranges in bpf hook")
+Reported-by: Qiang Zhang <dtzq01@gmail.com>
+Closes: https://lore.kernel.org/netdev/CAPx+-5uvFxkhkz4=j_Xuwkezjn9U6kzKTD5jz4tZ9msSJ0fOJA@mail.gmail.com/
+Cc: Yoshiki Komachi <komachi.yoshiki@gmail.com>
+Cc: Jamal Hadi Salim <jhs@mojatatu.com>
+Cc: Jiri Pirko <jiri@resnulli.us>
+Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
+Link: https://patch.msgid.link/20250218043210.732959-4-xiyou.wangcong@gmail.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/flow_dissector.c | 18 ++++++++++--------
+ 1 file changed, 10 insertions(+), 8 deletions(-)
+
+diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
+index c33af3ef0b790..9cd8de6bebb54 100644
+--- a/net/core/flow_dissector.c
++++ b/net/core/flow_dissector.c
+@@ -931,6 +931,7 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+ struct flow_dissector *flow_dissector,
+ void *target_container)
+ {
++ struct flow_dissector_key_ports_range *key_ports_range = NULL;
+ struct flow_dissector_key_ports *key_ports = NULL;
+ struct flow_dissector_key_control *key_control;
+ struct flow_dissector_key_basic *key_basic;
+@@ -975,20 +976,21 @@ static void __skb_flow_bpf_to_target(const struct bpf_flow_keys *flow_keys,
+ key_control->addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS;
+ }
+
+- if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS))
++ if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_PORTS)) {
+ key_ports = skb_flow_dissector_target(flow_dissector,
+ FLOW_DISSECTOR_KEY_PORTS,
+ target_container);
+- else if (dissector_uses_key(flow_dissector,
+- FLOW_DISSECTOR_KEY_PORTS_RANGE))
+- key_ports = skb_flow_dissector_target(flow_dissector,
+- FLOW_DISSECTOR_KEY_PORTS_RANGE,
+- target_container);
+-
+- if (key_ports) {
+ key_ports->src = flow_keys->sport;
+ key_ports->dst = flow_keys->dport;
+ }
++ if (dissector_uses_key(flow_dissector,
++ FLOW_DISSECTOR_KEY_PORTS_RANGE)) {
++ key_ports_range = skb_flow_dissector_target(flow_dissector,
++ FLOW_DISSECTOR_KEY_PORTS_RANGE,
++ target_container);
++ key_ports_range->tp.src = flow_keys->sport;
++ key_ports_range->tp.dst = flow_keys->dport;
++ }
+
+ if (dissector_uses_key(flow_dissector,
+ FLOW_DISSECTOR_KEY_FLOW_LABEL)) {
+--
+2.39.5
+
--- /dev/null
+From 4befa2aee7621a1f5802f1163484fb94a04e56a3 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 13:33:54 +0900
+Subject: geneve: Fix use-after-free in geneve_find_dev().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 9593172d93b9f91c362baec4643003dc29802929 ]
+
+syzkaller reported a use-after-free in geneve_find_dev() [0]
+without repro.
+
+geneve_configure() links struct geneve_dev.next to
+net_generic(net, geneve_net_id)->geneve_list.
+
+The net here could differ from dev_net(dev) if IFLA_NET_NS_PID,
+IFLA_NET_NS_FD, or IFLA_TARGET_NETNSID is set.
+
+When dev_net(dev) is dismantled, geneve_exit_batch_rtnl() finally
+calls unregister_netdevice_queue() for each dev in the netns,
+and later the dev is freed.
+
+However, its geneve_dev.next is still linked to the backend UDP
+socket netns.
+
+Then, use-after-free will occur when another geneve dev is created
+in the netns.
+
+Let's call geneve_dellink() instead in geneve_destroy_tunnels().
+
+[0]:
+BUG: KASAN: slab-use-after-free in geneve_find_dev drivers/net/geneve.c:1295 [inline]
+BUG: KASAN: slab-use-after-free in geneve_configure+0x234/0x858 drivers/net/geneve.c:1343
+Read of size 2 at addr ffff000054d6ee24 by task syz.1.4029/13441
+
+CPU: 1 UID: 0 PID: 13441 Comm: syz.1.4029 Not tainted 6.13.0-g0ad9617c78ac #24 dc35ca22c79fb82e8e7bc5c9c9adafea898b1e3d
+Hardware name: linux,dummy-virt (DT)
+Call trace:
+ show_stack+0x38/0x50 arch/arm64/kernel/stacktrace.c:466 (C)
+ __dump_stack lib/dump_stack.c:94 [inline]
+ dump_stack_lvl+0xbc/0x108 lib/dump_stack.c:120
+ print_address_description mm/kasan/report.c:378 [inline]
+ print_report+0x16c/0x6f0 mm/kasan/report.c:489
+ kasan_report+0xc0/0x120 mm/kasan/report.c:602
+ __asan_report_load2_noabort+0x20/0x30 mm/kasan/report_generic.c:379
+ geneve_find_dev drivers/net/geneve.c:1295 [inline]
+ geneve_configure+0x234/0x858 drivers/net/geneve.c:1343
+ geneve_newlink+0xb8/0x128 drivers/net/geneve.c:1634
+ rtnl_newlink_create+0x23c/0x868 net/core/rtnetlink.c:3795
+ __rtnl_newlink net/core/rtnetlink.c:3906 [inline]
+ rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021
+ rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911
+ netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543
+ rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938
+ netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline]
+ netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348
+ netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892
+ sock_sendmsg_nosec net/socket.c:713 [inline]
+ __sock_sendmsg net/socket.c:728 [inline]
+ ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568
+ ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622
+ __sys_sendmsg net/socket.c:2654 [inline]
+ __do_sys_sendmsg net/socket.c:2659 [inline]
+ __se_sys_sendmsg net/socket.c:2657 [inline]
+ __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657
+ __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
+ invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49
+ el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132
+ do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151
+ el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744
+ el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762
+ el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600
+
+Allocated by task 13247:
+ kasan_save_stack mm/kasan/common.c:47 [inline]
+ kasan_save_track+0x30/0x68 mm/kasan/common.c:68
+ kasan_save_alloc_info+0x44/0x58 mm/kasan/generic.c:568
+ poison_kmalloc_redzone mm/kasan/common.c:377 [inline]
+ __kasan_kmalloc+0x84/0xa0 mm/kasan/common.c:394
+ kasan_kmalloc include/linux/kasan.h:260 [inline]
+ __do_kmalloc_node mm/slub.c:4298 [inline]
+ __kmalloc_node_noprof+0x2a0/0x560 mm/slub.c:4304
+ __kvmalloc_node_noprof+0x9c/0x230 mm/util.c:645
+ alloc_netdev_mqs+0xb8/0x11a0 net/core/dev.c:11470
+ rtnl_create_link+0x2b8/0xb50 net/core/rtnetlink.c:3604
+ rtnl_newlink_create+0x19c/0x868 net/core/rtnetlink.c:3780
+ __rtnl_newlink net/core/rtnetlink.c:3906 [inline]
+ rtnl_newlink+0x1054/0x1630 net/core/rtnetlink.c:4021
+ rtnetlink_rcv_msg+0x61c/0x918 net/core/rtnetlink.c:6911
+ netlink_rcv_skb+0x1dc/0x398 net/netlink/af_netlink.c:2543
+ rtnetlink_rcv+0x34/0x50 net/core/rtnetlink.c:6938
+ netlink_unicast_kernel net/netlink/af_netlink.c:1322 [inline]
+ netlink_unicast+0x618/0x838 net/netlink/af_netlink.c:1348
+ netlink_sendmsg+0x5fc/0x8b0 net/netlink/af_netlink.c:1892
+ sock_sendmsg_nosec net/socket.c:713 [inline]
+ __sock_sendmsg net/socket.c:728 [inline]
+ ____sys_sendmsg+0x410/0x6f8 net/socket.c:2568
+ ___sys_sendmsg+0x178/0x1d8 net/socket.c:2622
+ __sys_sendmsg net/socket.c:2654 [inline]
+ __do_sys_sendmsg net/socket.c:2659 [inline]
+ __se_sys_sendmsg net/socket.c:2657 [inline]
+ __arm64_sys_sendmsg+0x12c/0x1c8 net/socket.c:2657
+ __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline]
+ invoke_syscall+0x90/0x278 arch/arm64/kernel/syscall.c:49
+ el0_svc_common+0x13c/0x250 arch/arm64/kernel/syscall.c:132
+ do_el0_svc+0x54/0x70 arch/arm64/kernel/syscall.c:151
+ el0_svc+0x4c/0xa8 arch/arm64/kernel/entry-common.c:744
+ el0t_64_sync_handler+0x78/0x108 arch/arm64/kernel/entry-common.c:762
+ el0t_64_sync+0x198/0x1a0 arch/arm64/kernel/entry.S:600
+
+Freed by task 45:
+ kasan_save_stack mm/kasan/common.c:47 [inline]
+ kasan_save_track+0x30/0x68 mm/kasan/common.c:68
+ kasan_save_free_info+0x58/0x70 mm/kasan/generic.c:582
+ poison_slab_object mm/kasan/common.c:247 [inline]
+ __kasan_slab_free+0x48/0x68 mm/kasan/common.c:264
+ kasan_slab_free include/linux/kasan.h:233 [inline]
+ slab_free_hook mm/slub.c:2353 [inline]
+ slab_free mm/slub.c:4613 [inline]
+ kfree+0x140/0x420 mm/slub.c:4761
+ kvfree+0x4c/0x68 mm/util.c:688
+ netdev_release+0x94/0xc8 net/core/net-sysfs.c:2065
+ device_release+0x98/0x1c0
+ kobject_cleanup lib/kobject.c:689 [inline]
+ kobject_release lib/kobject.c:720 [inline]
+ kref_put include/linux/kref.h:65 [inline]
+ kobject_put+0x2b0/0x438 lib/kobject.c:737
+ netdev_run_todo+0xe5c/0xfc8 net/core/dev.c:11185
+ rtnl_unlock+0x20/0x38 net/core/rtnetlink.c:151
+ cleanup_net+0x4fc/0x8c0 net/core/net_namespace.c:648
+ process_one_work+0x700/0x1398 kernel/workqueue.c:3236
+ process_scheduled_works kernel/workqueue.c:3317 [inline]
+ worker_thread+0x8c4/0xe10 kernel/workqueue.c:3398
+ kthread+0x4bc/0x608 kernel/kthread.c:464
+ ret_from_fork+0x10/0x20 arch/arm64/kernel/entry.S:862
+
+The buggy address belongs to the object at ffff000054d6e000
+ which belongs to the cache kmalloc-cg-4k of size 4096
+The buggy address is located 3620 bytes inside of
+ freed 4096-byte region [ffff000054d6e000, ffff000054d6f000)
+
+The buggy address belongs to the physical page:
+page: refcount:1 mapcount:0 mapping:0000000000000000 index:0x0 pfn:0x94d68
+head: order:3 mapcount:0 entire_mapcount:0 nr_pages_mapped:0 pincount:0
+memcg:ffff000016276181
+flags: 0x3fffe0000000040(head|node=0|zone=0|lastcpupid=0x1ffff)
+page_type: f5(slab)
+raw: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000
+raw: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181
+head: 03fffe0000000040 ffff0000c000f500 dead000000000122 0000000000000000
+head: 0000000000000000 0000000000040004 00000001f5000000 ffff000016276181
+head: 03fffe0000000003 fffffdffc1535a01 ffffffffffffffff 0000000000000000
+head: 0000000000000008 0000000000000000 00000000ffffffff 0000000000000000
+page dumped because: kasan: bad access detected
+
+Memory state around the buggy address:
+ ffff000054d6ed00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff000054d6ed80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+>ffff000054d6ee00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ^
+ ffff000054d6ee80: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+ ffff000054d6ef00: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
+
+Fixes: 2d07dc79fe04 ("geneve: add initial netdev driver for GENEVE tunnels")
+Reported-by: syzkaller <syzkaller@googlegroups.com>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250213043354.91368-1-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/geneve.c | 11 +++--------
+ 1 file changed, 3 insertions(+), 8 deletions(-)
+
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index bc658bc608854..363fff28db737 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -1907,16 +1907,11 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
+ /* gather any geneve devices that were moved into this ns */
+ for_each_netdev_safe(net, dev, aux)
+ if (dev->rtnl_link_ops == &geneve_link_ops)
+- unregister_netdevice_queue(dev, head);
++ geneve_dellink(dev, head);
+
+ /* now gather any other geneve devices that were created in this ns */
+- list_for_each_entry_safe(geneve, next, &gn->geneve_list, next) {
+- /* If geneve->dev is in the same netns, it was already added
+- * to the list by the previous loop.
+- */
+- if (!net_eq(dev_net(geneve->dev), net))
+- unregister_netdevice_queue(geneve->dev, head);
+- }
++ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
++ geneve_dellink(geneve->dev, head);
+ }
+
+ static void __net_exit geneve_exit_batch_rtnl(struct list_head *net_list,
+--
+2.39.5
+
--- /dev/null
+From 4e419644998fa894e47162cd6e62eaeee51602de Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 12:37:05 -0800
+Subject: geneve: Suppress list corruption splat in geneve_destroy_tunnels().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 62fab6eef61f245dc8797e3a6a5b890ef40e8628 ]
+
+As explained in the previous patch, iterating for_each_netdev() and
+gn->geneve_list during ->exit_batch_rtnl() could trigger ->dellink()
+twice for the same device.
+
+If CONFIG_DEBUG_LIST is enabled, we will see a list_del() corruption
+splat in the 2nd call of geneve_dellink().
+
+Let's remove for_each_netdev() in geneve_destroy_tunnels() and delegate
+that part to default_device_exit_batch().
+
+Fixes: 9593172d93b9 ("geneve: Fix use-after-free in geneve_find_dev().")
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250217203705.40342-3-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/geneve.c | 7 -------
+ 1 file changed, 7 deletions(-)
+
+diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
+index 363fff28db737..eea0875e4e551 100644
+--- a/drivers/net/geneve.c
++++ b/drivers/net/geneve.c
+@@ -1902,14 +1902,7 @@ static void geneve_destroy_tunnels(struct net *net, struct list_head *head)
+ {
+ struct geneve_net *gn = net_generic(net, geneve_net_id);
+ struct geneve_dev *geneve, *next;
+- struct net_device *dev, *aux;
+
+- /* gather any geneve devices that were moved into this ns */
+- for_each_netdev_safe(net, dev, aux)
+- if (dev->rtnl_link_ops == &geneve_link_ops)
+- geneve_dellink(dev, head);
+-
+- /* now gather any other geneve devices that were created in this ns */
+ list_for_each_entry_safe(geneve, next, &gn->geneve_list, next)
+ geneve_dellink(geneve->dev, head);
+ }
+--
+2.39.5
+
--- /dev/null
+From c49269d9e83d25865d45a2abd75754f73443b2df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 12:37:04 -0800
+Subject: gtp: Suppress list corruption splat in gtp_net_exit_batch_rtnl().
+
+From: Kuniyuki Iwashima <kuniyu@amazon.com>
+
+[ Upstream commit 4ccacf86491d33d2486b62d4d44864d7101b299d ]
+
+Brad Spengler reported the list_del() corruption splat in
+gtp_net_exit_batch_rtnl(). [0]
+
+Commit eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns
+dismantle.") added the for_each_netdev() loop in gtp_net_exit_batch_rtnl()
+to destroy devices in each netns as done in geneve and ip tunnels.
+
+However, this could trigger ->dellink() twice for the same device during
+->exit_batch_rtnl().
+
+Say we have two netns A & B and gtp device B that resides in netns B but
+whose UDP socket is in netns A.
+
+ 1. cleanup_net() processes netns A and then B.
+
+ 2. gtp_net_exit_batch_rtnl() finds the device B while iterating
+ netns A's gn->gtp_dev_list and calls ->dellink().
+
+ [ device B is not yet unlinked from netns B
+ as unregister_netdevice_many() has not been called. ]
+
+ 3. gtp_net_exit_batch_rtnl() finds the device B while iterating
+ netns B's for_each_netdev() and calls ->dellink().
+
+gtp_dellink() cleans up the device's hash table, unlinks the dev from
+gn->gtp_dev_list, and calls unregister_netdevice_queue().
+
+Basically, calling gtp_dellink() multiple times is fine unless
+CONFIG_DEBUG_LIST is enabled.
+
+Let's remove for_each_netdev() in gtp_net_exit_batch_rtnl() and
+delegate the destruction to default_device_exit_batch() as done
+in bareudp.
+
+[0]:
+list_del corruption, ffff8880aaa62c00->next (autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object]) is LIST_POISON1 (ffffffffffffff02) (prev is 0xffffffffffffff04)
+kernel BUG at lib/list_debug.c:58!
+Oops: invalid opcode: 0000 [#1] PREEMPT SMP KASAN
+CPU: 1 UID: 0 PID: 1804 Comm: kworker/u8:7 Tainted: G T 6.12.13-grsec-full-20250211091339 #1
+Tainted: [T]=RANDSTRUCT
+Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.15.0-1 04/01/2014
+Workqueue: netns cleanup_net
+RIP: 0010:[<ffffffff84947381>] __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58
+Code: c2 76 91 31 c0 e8 9f b1 f7 fc 0f 0b 4d 89 f0 48 c7 c1 02 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 e0 c2 76 91 31 c0 e8 7f b1 f7 fc <0f> 0b 4d 89 e8 48 c7 c1 04 ff ff ff 48 89 ea 48 89 ee 48 c7 c7 60
+RSP: 0018:fffffe8040b4fbd0 EFLAGS: 00010283
+RAX: 00000000000000cc RBX: dffffc0000000000 RCX: ffffffff818c4054
+RDX: ffffffff84947381 RSI: ffffffff818d1512 RDI: 0000000000000000
+RBP: ffff8880aaa62c00 R08: 0000000000000001 R09: fffffbd008169f32
+R10: fffffe8040b4f997 R11: 0000000000000001 R12: a1988d84f24943e4
+R13: ffffffffffffff02 R14: ffffffffffffff04 R15: ffff8880aaa62c08
+RBX: kasan shadow of 0x0
+RCX: __wake_up_klogd.part.0+0x74/0xe0 kernel/printk/printk.c:4554
+RDX: __list_del_entry_valid_or_report+0x141/0x200 lib/list_debug.c:58
+RSI: vprintk+0x72/0x100 kernel/printk/printk_safe.c:71
+RBP: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc00/0x1000 [slab object]
+RSP: process kstack fffffe8040b4fbd0+0x7bd0/0x8000 [kworker/u8:7+netns 1804 ]
+R09: kasan shadow of process kstack fffffe8040b4f990+0x7990/0x8000 [kworker/u8:7+netns 1804 ]
+R10: process kstack fffffe8040b4f997+0x7997/0x8000 [kworker/u8:7+netns 1804 ]
+R15: autoslab_size_M_dev_P_net_core_dev_11127_8_1328_8_S_4096_A_64_n_139+0xc08/0x1000 [slab object]
+FS: 0000000000000000(0000) GS:ffff888116000000(0000) knlGS:0000000000000000
+CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+CR2: 0000748f5372c000 CR3: 0000000015408000 CR4: 00000000003406f0 shadow CR4: 00000000003406f0
+Stack:
+ 0000000000000000 ffffffff8a0c35e7 ffffffff8a0c3603 ffff8880aaa62c00
+ ffff8880aaa62c00 0000000000000004 ffff88811145311c 0000000000000005
+ 0000000000000001 ffff8880aaa62000 fffffe8040b4fd40 ffffffff8a0c360d
+Call Trace:
+ <TASK>
+ [<ffffffff8a0c360d>] __list_del_entry_valid include/linux/list.h:131 [inline] fffffe8040b4fc28
+ [<ffffffff8a0c360d>] __list_del_entry include/linux/list.h:248 [inline] fffffe8040b4fc28
+ [<ffffffff8a0c360d>] list_del include/linux/list.h:262 [inline] fffffe8040b4fc28
+ [<ffffffff8a0c360d>] gtp_dellink+0x16d/0x360 drivers/net/gtp.c:1557 fffffe8040b4fc28
+ [<ffffffff8a0d0404>] gtp_net_exit_batch_rtnl+0x124/0x2c0 drivers/net/gtp.c:2495 fffffe8040b4fc88
+ [<ffffffff8e705b24>] cleanup_net+0x5a4/0xbe0 net/core/net_namespace.c:635 fffffe8040b4fcd0
+ [<ffffffff81754c97>] process_one_work+0xbd7/0x2160 kernel/workqueue.c:3326 fffffe8040b4fd88
+ [<ffffffff81757195>] process_scheduled_works kernel/workqueue.c:3407 [inline] fffffe8040b4fec0
+ [<ffffffff81757195>] worker_thread+0x6b5/0xfa0 kernel/workqueue.c:3488 fffffe8040b4fec0
+ [<ffffffff817782a0>] kthread+0x360/0x4c0 kernel/kthread.c:397 fffffe8040b4ff78
+ [<ffffffff814d8594>] ret_from_fork+0x74/0xe0 arch/x86/kernel/process.c:172 fffffe8040b4ffb8
+ [<ffffffff8110f509>] ret_from_fork_asm+0x29/0xc0 arch/x86/entry/entry_64.S:399 fffffe8040b4ffe8
+ </TASK>
+Modules linked in:
+
+Fixes: eb28fd76c0a0 ("gtp: Destroy device along with udp socket's netns dismantle.")
+Reported-by: Brad Spengler <spender@grsecurity.net>
+Signed-off-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250217203705.40342-2-kuniyu@amazon.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/gtp.c | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c
+index fbabada7d3ba9..2cb13e092a856 100644
+--- a/drivers/net/gtp.c
++++ b/drivers/net/gtp.c
+@@ -2479,11 +2479,6 @@ static void __net_exit gtp_net_exit_batch_rtnl(struct list_head *net_list,
+ list_for_each_entry(net, net_list, exit_list) {
+ struct gtp_net *gn = net_generic(net, gtp_net_id);
+ struct gtp_dev *gtp, *gtp_next;
+- struct net_device *dev;
+-
+- for_each_netdev(net, dev)
+- if (dev->rtnl_link_ops == >p_link_ops)
+- gtp_dellink(dev, dev_to_kill);
+
+ list_for_each_entry_safe(gtp, gtp_next, &gn->gtp_dev_list, list)
+ gtp_dellink(gtp->dev, dev_to_kill);
+--
+2.39.5
+
--- /dev/null
+From 988392db820579e3abede7fdf541f555849dd2d6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 09:52:33 -0600
+Subject: ibmvnic: Don't reference skb after sending to VIOS
+
+From: Nick Child <nnac123@linux.ibm.com>
+
+[ Upstream commit bdf5d13aa05ec314d4385b31ac974d6c7e0997c9 ]
+
+Previously, after successfully flushing the xmit buffer to VIOS,
+the tx_bytes stat was incremented by the length of the skb.
+
+It is invalid to access the skb memory after sending the buffer to
+the VIOS because, at any point after sending, the VIOS can trigger
+an interrupt to free this memory. A race between reading skb->len
+and freeing the skb is possible (especially during LPM) and will
+result in use-after-free:
+ ==================================================================
+ BUG: KASAN: slab-use-after-free in ibmvnic_xmit+0x75c/0x1808 [ibmvnic]
+ Read of size 4 at addr c00000024eb48a70 by task hxecom/14495
+ <...>
+ Call Trace:
+ [c000000118f66cf0] [c0000000018cba6c] dump_stack_lvl+0x84/0xe8 (unreliable)
+ [c000000118f66d20] [c0000000006f0080] print_report+0x1a8/0x7f0
+ [c000000118f66df0] [c0000000006f08f0] kasan_report+0x128/0x1f8
+ [c000000118f66f00] [c0000000006f2868] __asan_load4+0xac/0xe0
+ [c000000118f66f20] [c0080000046eac84] ibmvnic_xmit+0x75c/0x1808 [ibmvnic]
+ [c000000118f67340] [c0000000014be168] dev_hard_start_xmit+0x150/0x358
+ <...>
+ Freed by task 0:
+ kasan_save_stack+0x34/0x68
+ kasan_save_track+0x2c/0x50
+ kasan_save_free_info+0x64/0x108
+ __kasan_mempool_poison_object+0x148/0x2d4
+ napi_skb_cache_put+0x5c/0x194
+ net_tx_action+0x154/0x5b8
+ handle_softirqs+0x20c/0x60c
+ do_softirq_own_stack+0x6c/0x88
+ <...>
+ The buggy address belongs to the object at c00000024eb48a00 which
+ belongs to the cache skbuff_head_cache of size 224
+==================================================================
+
+Fixes: 032c5e82847a ("Driver for IBM System i/p VNIC protocol")
+Signed-off-by: Nick Child <nnac123@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250214155233.235559-1-nnac123@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/ibm/ibmvnic.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c
+index e95ae0d39948c..0676fc547b6f4 100644
+--- a/drivers/net/ethernet/ibm/ibmvnic.c
++++ b/drivers/net/ethernet/ibm/ibmvnic.c
+@@ -2408,6 +2408,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ dma_addr_t data_dma_addr;
+ struct netdev_queue *txq;
+ unsigned long lpar_rc;
++ unsigned int skblen;
+ union sub_crq tx_crq;
+ unsigned int offset;
+ bool use_scrq_send_direct = false;
+@@ -2522,6 +2523,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ tx_buff->skb = skb;
+ tx_buff->index = bufidx;
+ tx_buff->pool_index = queue_num;
++ skblen = skb->len;
+
+ memset(&tx_crq, 0, sizeof(tx_crq));
+ tx_crq.v1.first = IBMVNIC_CRQ_CMD;
+@@ -2614,7 +2616,7 @@ static netdev_tx_t ibmvnic_xmit(struct sk_buff *skb, struct net_device *netdev)
+ netif_stop_subqueue(netdev, queue_num);
+ }
+
+- tx_bytes += skb->len;
++ tx_bytes += skblen;
+ txq_trans_cond_update(txq);
+ ret = NETDEV_TX_OK;
+ goto out;
+--
+2.39.5
+
--- /dev/null
+From faab6320159cee097c6242bf85f466bb2629cf65 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jan 2025 19:53:12 -0800
+Subject: net: add netdev->up protected by netdev_lock()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5112457f3d8e41f987908266068af88ef9f3ab78 ]
+
+Some uAPI (netdev netlink) hide net_device's sub-objects while
+the interface is down to ensure uniform behavior across drivers.
+To remove the rtnl_lock dependency from those uAPIs we need a way
+to safely tell if the device is down or up.
+
+Add an indication of whether device is open or closed, protected
+by netdev->lock. The semantics are the same as IFF_UP, but taking
+netdev_lock around every write to ->flags would be a lot of code
+churn.
+
+We don't want to blanket the entire open / close path by netdev_lock,
+because it will prevent us from applying it to specific structures -
+core helpers won't be able to take that lock from any function
+called by the drivers on open/close paths.
+
+So the state of the flag is "pessimistic", as in it may report false
+negatives, but never false positives.
+
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250115035319.559603-5-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 14 +++++++++++++-
+ net/core/dev.c | 4 ++--
+ net/core/dev.h | 12 ++++++++++++
+ 3 files changed, 27 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index e0a8093c9be80..eb4d61eee7e97 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2441,12 +2441,24 @@ struct net_device {
+ unsigned long gro_flush_timeout;
+ u32 napi_defer_hard_irqs;
+
++ /**
++ * @up: copy of @state's IFF_UP, but safe to read with just @lock.
++ * May report false negatives while the device is being opened
++ * or closed (@lock does not protect .ndo_open, or .ndo_close).
++ */
++ bool up;
++
+ /**
+ * @lock: netdev-scope lock, protects a small selection of fields.
+ * Should always be taken using netdev_lock() / netdev_unlock() helpers.
+ * Drivers are free to use it for other protection.
+ *
+- * Protects: @reg_state, @net_shaper_hierarchy.
++ * Protects:
++ * @net_shaper_hierarchy, @reg_state
++ *
++ * Partially protects (writers must hold both @lock and rtnl_lock):
++ * @up
++ *
+ * Ordering: take after rtnl_lock.
+ */
+ struct mutex lock;
+diff --git a/net/core/dev.c b/net/core/dev.c
+index d1e8613151a4a..60f48d63559a1 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1543,7 +1543,7 @@ static int __dev_open(struct net_device *dev, struct netlink_ext_ack *extack)
+ if (ret)
+ clear_bit(__LINK_STATE_START, &dev->state);
+ else {
+- dev->flags |= IFF_UP;
++ netif_set_up(dev, true);
+ dev_set_rx_mode(dev);
+ dev_activate(dev);
+ add_device_randomness(dev->dev_addr, dev->addr_len);
+@@ -1622,7 +1622,7 @@ static void __dev_close_many(struct list_head *head)
+ if (ops->ndo_stop)
+ ops->ndo_stop(dev);
+
+- dev->flags &= ~IFF_UP;
++ netif_set_up(dev, false);
+ netpoll_poll_enable(dev);
+ }
+ }
+diff --git a/net/core/dev.h b/net/core/dev.h
+index deb5eae5749fa..e17c640c05fb9 100644
+--- a/net/core/dev.h
++++ b/net/core/dev.h
+@@ -111,6 +111,18 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags,
+ void unregister_netdevice_many_notify(struct list_head *head,
+ u32 portid, const struct nlmsghdr *nlh);
+
++static inline void netif_set_up(struct net_device *dev, bool value)
++{
++ if (value)
++ dev->flags |= IFF_UP;
++ else
++ dev->flags &= ~IFF_UP;
++
++ netdev_lock(dev);
++ dev->up = value;
++ netdev_unlock(dev);
++}
++
+ static inline void netif_set_gso_max_size(struct net_device *dev,
+ unsigned int size)
+ {
+--
+2.39.5
+
--- /dev/null
+From a4f67934c19efbd4ab93b7736294d67544cffd53 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jan 2025 19:53:09 -0800
+Subject: net: add netdev_lock() / netdev_unlock() helpers
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit ebda2f0bbde540ff7da168d2837f8cfb14581e2e ]
+
+Add helpers for locking the netdev instance, use it in drivers
+and the shaper code. This will make grepping for the lock usage
+much easier, as we extend the lock to cover more fields.
+
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Reviewed-by: Przemek Kitszel <przemyslaw.kitszel@intel.com>
+Link: https://patch.msgid.link/20250115035319.559603-2-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 74 ++++++++++-----------
+ drivers/net/netdevsim/ethtool.c | 4 +-
+ include/linux/netdevice.h | 23 ++++++-
+ net/shaper/shaper.c | 6 +-
+ 4 files changed, 63 insertions(+), 44 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 7c427003184d5..72314b0a1b25b 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1992,7 +1992,7 @@ static void iavf_finish_config(struct work_struct *work)
+ * The dev->lock is needed to update the queue number
+ */
+ rtnl_lock();
+- mutex_lock(&adapter->netdev->lock);
++ netdev_lock(adapter->netdev);
+ mutex_lock(&adapter->crit_lock);
+
+ if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) &&
+@@ -2012,7 +2012,7 @@ static void iavf_finish_config(struct work_struct *work)
+ netif_set_real_num_tx_queues(adapter->netdev, pairs);
+
+ if (adapter->netdev->reg_state != NETREG_REGISTERED) {
+- mutex_unlock(&adapter->netdev->lock);
++ netdev_unlock(adapter->netdev);
+ netdev_released = true;
+ err = register_netdevice(adapter->netdev);
+ if (err) {
+@@ -2042,7 +2042,7 @@ static void iavf_finish_config(struct work_struct *work)
+ out:
+ mutex_unlock(&adapter->crit_lock);
+ if (!netdev_released)
+- mutex_unlock(&adapter->netdev->lock);
++ netdev_unlock(adapter->netdev);
+ rtnl_unlock();
+ }
+
+@@ -2739,10 +2739,10 @@ static void iavf_watchdog_task(struct work_struct *work)
+ struct iavf_hw *hw = &adapter->hw;
+ u32 reg_val;
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ if (!mutex_trylock(&adapter->crit_lock)) {
+ if (adapter->state == __IAVF_REMOVE) {
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return;
+ }
+
+@@ -2756,35 +2756,35 @@ static void iavf_watchdog_task(struct work_struct *work)
+ case __IAVF_STARTUP:
+ iavf_startup(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(30));
+ return;
+ case __IAVF_INIT_VERSION_CHECK:
+ iavf_init_version_check(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(30));
+ return;
+ case __IAVF_INIT_GET_RESOURCES:
+ iavf_init_get_resources(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+ case __IAVF_INIT_EXTENDED_CAPS:
+ iavf_init_process_extended_caps(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+ case __IAVF_INIT_CONFIG_ADAPTER:
+ iavf_init_config_adapter(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ msecs_to_jiffies(1));
+ return;
+@@ -2796,7 +2796,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ * as it can loop forever
+ */
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return;
+ }
+ if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) {
+@@ -2805,7 +2805,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED;
+ iavf_shutdown_adminq(hw);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task, (5 * HZ));
+ return;
+@@ -2813,7 +2813,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ /* Try again from failed step*/
+ iavf_change_state(adapter, adapter->last_state);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ);
+ return;
+ case __IAVF_COMM_FAILED:
+@@ -2826,7 +2826,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ iavf_change_state(adapter, __IAVF_INIT_FAILED);
+ adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED;
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return;
+ }
+ reg_val = rd32(hw, IAVF_VFGEN_RSTAT) &
+@@ -2846,14 +2846,14 @@ static void iavf_watchdog_task(struct work_struct *work)
+ adapter->aq_required = 0;
+ adapter->current_op = VIRTCHNL_OP_UNKNOWN;
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task,
+ msecs_to_jiffies(10));
+ return;
+ case __IAVF_RESETTING:
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq, &adapter->watchdog_task,
+ HZ * 2);
+ return;
+@@ -2884,7 +2884,7 @@ static void iavf_watchdog_task(struct work_struct *work)
+ case __IAVF_REMOVE:
+ default:
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return;
+ }
+
+@@ -2896,14 +2896,14 @@ static void iavf_watchdog_task(struct work_struct *work)
+ dev_err(&adapter->pdev->dev, "Hardware reset detected\n");
+ iavf_schedule_reset(adapter, IAVF_FLAG_RESET_PENDING);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ queue_delayed_work(adapter->wq,
+ &adapter->watchdog_task, HZ * 2);
+ return;
+ }
+
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ restart_watchdog:
+ if (adapter->state >= __IAVF_DOWN)
+ queue_work(adapter->wq, &adapter->adminq_task);
+@@ -3030,12 +3030,12 @@ static void iavf_reset_task(struct work_struct *work)
+ /* When device is being removed it doesn't make sense to run the reset
+ * task, just return in such a case.
+ */
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ if (!mutex_trylock(&adapter->crit_lock)) {
+ if (adapter->state != __IAVF_REMOVE)
+ queue_work(adapter->wq, &adapter->reset_task);
+
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return;
+ }
+
+@@ -3083,7 +3083,7 @@ static void iavf_reset_task(struct work_struct *work)
+ reg_val);
+ iavf_disable_vf(adapter);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return; /* Do not attempt to reinit. It's dead, Jim. */
+ }
+
+@@ -3224,7 +3224,7 @@ static void iavf_reset_task(struct work_struct *work)
+
+ wake_up(&adapter->reset_waitqueue);
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ return;
+ reset_err:
+@@ -3235,7 +3235,7 @@ static void iavf_reset_task(struct work_struct *work)
+ iavf_disable_vf(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n");
+ }
+
+@@ -3707,10 +3707,10 @@ static int __iavf_setup_tc(struct net_device *netdev, void *type_data)
+ if (test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section))
+ return 0;
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ netif_set_real_num_rx_queues(netdev, total_qps);
+ netif_set_real_num_tx_queues(netdev, total_qps);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ return ret;
+ }
+@@ -4380,7 +4380,7 @@ static int iavf_open(struct net_device *netdev)
+ return -EIO;
+ }
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ while (!mutex_trylock(&adapter->crit_lock)) {
+ /* If we are in __IAVF_INIT_CONFIG_ADAPTER state the crit_lock
+ * is already taken and iavf_open is called from an upper
+@@ -4388,7 +4388,7 @@ static int iavf_open(struct net_device *netdev)
+ * We have to leave here to avoid dead lock.
+ */
+ if (adapter->state == __IAVF_INIT_CONFIG_ADAPTER) {
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return -EBUSY;
+ }
+
+@@ -4439,7 +4439,7 @@ static int iavf_open(struct net_device *netdev)
+ iavf_irq_enable(adapter, true);
+
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ return 0;
+
+@@ -4452,7 +4452,7 @@ static int iavf_open(struct net_device *netdev)
+ iavf_free_all_tx_resources(adapter);
+ err_unlock:
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ return err;
+ }
+@@ -4474,12 +4474,12 @@ static int iavf_close(struct net_device *netdev)
+ u64 aq_to_restore;
+ int status;
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ mutex_lock(&adapter->crit_lock);
+
+ if (adapter->state <= __IAVF_DOWN_PENDING) {
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+ return 0;
+ }
+
+@@ -4513,7 +4513,7 @@ static int iavf_close(struct net_device *netdev)
+ iavf_free_traffic_irqs(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ /* We explicitly don't free resources here because the hardware is
+ * still active and can DMA into memory. Resources are cleared in
+@@ -5390,7 +5390,7 @@ static int iavf_suspend(struct device *dev_d)
+
+ netif_device_detach(netdev);
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ mutex_lock(&adapter->crit_lock);
+
+ if (netif_running(netdev)) {
+@@ -5402,7 +5402,7 @@ static int iavf_suspend(struct device *dev_d)
+ iavf_reset_interrupt_capability(adapter);
+
+ mutex_unlock(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ return 0;
+ }
+@@ -5501,7 +5501,7 @@ static void iavf_remove(struct pci_dev *pdev)
+ if (netdev->reg_state == NETREG_REGISTERED)
+ unregister_netdev(netdev);
+
+- mutex_lock(&netdev->lock);
++ netdev_lock(netdev);
+ mutex_lock(&adapter->crit_lock);
+ dev_info(&adapter->pdev->dev, "Removing device\n");
+ iavf_change_state(adapter, __IAVF_REMOVE);
+@@ -5538,7 +5538,7 @@ static void iavf_remove(struct pci_dev *pdev)
+ mutex_destroy(&hw->aq.asq_mutex);
+ mutex_unlock(&adapter->crit_lock);
+ mutex_destroy(&adapter->crit_lock);
+- mutex_unlock(&netdev->lock);
++ netdev_unlock(netdev);
+
+ iounmap(hw->hw_addr);
+ pci_release_regions(pdev);
+diff --git a/drivers/net/netdevsim/ethtool.c b/drivers/net/netdevsim/ethtool.c
+index 5fe1eaef99b5b..3f44a11aec83e 100644
+--- a/drivers/net/netdevsim/ethtool.c
++++ b/drivers/net/netdevsim/ethtool.c
+@@ -103,10 +103,10 @@ nsim_set_channels(struct net_device *dev, struct ethtool_channels *ch)
+ struct netdevsim *ns = netdev_priv(dev);
+ int err;
+
+- mutex_lock(&dev->lock);
++ netdev_lock(dev);
+ err = netif_set_real_num_queues(dev, ch->combined_count,
+ ch->combined_count);
+- mutex_unlock(&dev->lock);
++ netdev_unlock(dev);
+ if (err)
+ return err;
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index bb71ad82b42ba..4b2964d0d885e 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2442,8 +2442,12 @@ struct net_device {
+ u32 napi_defer_hard_irqs;
+
+ /**
+- * @lock: protects @net_shaper_hierarchy, feel free to use for other
+- * netdev-scope protection. Ordering: take after rtnl_lock.
++ * @lock: netdev-scope lock, protects a small selection of fields.
++ * Should always be taken using netdev_lock() / netdev_unlock() helpers.
++ * Drivers are free to use it for other protection.
++ *
++ * Protects: @net_shaper_hierarchy.
++ * Ordering: take after rtnl_lock.
+ */
+ struct mutex lock;
+
+@@ -2673,6 +2677,21 @@ void netif_queue_set_napi(struct net_device *dev, unsigned int queue_index,
+ enum netdev_queue_type type,
+ struct napi_struct *napi);
+
++static inline void netdev_lock(struct net_device *dev)
++{
++ mutex_lock(&dev->lock);
++}
++
++static inline void netdev_unlock(struct net_device *dev)
++{
++ mutex_unlock(&dev->lock);
++}
++
++static inline void netdev_assert_locked(struct net_device *dev)
++{
++ lockdep_assert_held(&dev->lock);
++}
++
+ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
+ {
+ napi->irq = irq;
+diff --git a/net/shaper/shaper.c b/net/shaper/shaper.c
+index 15463062fe7b6..7101a48bce545 100644
+--- a/net/shaper/shaper.c
++++ b/net/shaper/shaper.c
+@@ -40,7 +40,7 @@ static void net_shaper_lock(struct net_shaper_binding *binding)
+ {
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+- mutex_lock(&binding->netdev->lock);
++ netdev_lock(binding->netdev);
+ break;
+ }
+ }
+@@ -49,7 +49,7 @@ static void net_shaper_unlock(struct net_shaper_binding *binding)
+ {
+ switch (binding->type) {
+ case NET_SHAPER_BINDING_TYPE_NETDEV:
+- mutex_unlock(&binding->netdev->lock);
++ netdev_unlock(binding->netdev);
+ break;
+ }
+ }
+@@ -1398,7 +1398,7 @@ void net_shaper_set_real_num_tx_queues(struct net_device *dev,
+ /* Only drivers implementing shapers support ensure
+ * the lock is acquired in advance.
+ */
+- lockdep_assert_held(&dev->lock);
++ netdev_assert_locked(dev);
+
+ /* Take action only when decreasing the tx queue number. */
+ for (i = txq; i < dev->real_num_tx_queues; ++i) {
+--
+2.39.5
+
--- /dev/null
+From bf91f7abf6041ea207c0bcce3a3624e1cf7cf1ce Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 05:49:30 -0800
+Subject: net: Add non-RCU dev_getbyhwaddr() helper
+
+From: Breno Leitao <leitao@debian.org>
+
+[ Upstream commit 4b5a28b38c4a0106c64416a1b2042405166b26ce ]
+
+Add dedicated helper for finding devices by hardware address when
+holding rtnl_lock, similar to existing dev_getbyhwaddr_rcu(). This prevents
+PROVE_LOCKING warnings when rtnl_lock is held but RCU read lock is not.
+
+Extract common address comparison logic into dev_addr_cmp().
+
+The context about this change could be found in the following
+discussion:
+
+Link: https://lore.kernel.org/all/20250206-scarlet-ermine-of-improvement-1fcac5@leitao/
+
+Cc: kuniyu@amazon.com
+Cc: ushankar@purestorage.com
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Breno Leitao <leitao@debian.org>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250218-arm_fix_selftest-v5-1-d3d6892db9e1@debian.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 4eae0ee0f1e6 ("arp: switch to dev_getbyhwaddr() in arp_req_set_public()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 2 ++
+ net/core/dev.c | 37 ++++++++++++++++++++++++++++++++++---
+ 2 files changed, 36 insertions(+), 3 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 8268be0723eee..bb71ad82b42ba 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -3138,6 +3138,8 @@ static inline struct net_device *first_net_device_rcu(struct net *net)
+ }
+
+ int netdev_boot_setup_check(struct net_device *dev);
++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
++ const char *hwaddr);
+ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+ const char *hwaddr);
+ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type);
+diff --git a/net/core/dev.c b/net/core/dev.c
+index fbb796375aa0e..2b09714761c62 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -1012,6 +1012,12 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
+ return ret;
+ }
+
++static bool dev_addr_cmp(struct net_device *dev, unsigned short type,
++ const char *ha)
++{
++ return dev->type == type && !memcmp(dev->dev_addr, ha, dev->addr_len);
++}
++
+ /**
+ * dev_getbyhwaddr_rcu - find a device by its hardware address
+ * @net: the applicable net namespace
+@@ -1020,7 +1026,7 @@ int netdev_get_name(struct net *net, char *name, int ifindex)
+ *
+ * Search for an interface by MAC address. Returns NULL if the device
+ * is not found or a pointer to the device.
+- * The caller must hold RCU or RTNL.
++ * The caller must hold RCU.
+ * The returned device has not had its ref count increased
+ * and the caller must therefore be careful about locking
+ *
+@@ -1032,14 +1038,39 @@ struct net_device *dev_getbyhwaddr_rcu(struct net *net, unsigned short type,
+ struct net_device *dev;
+
+ for_each_netdev_rcu(net, dev)
+- if (dev->type == type &&
+- !memcmp(dev->dev_addr, ha, dev->addr_len))
++ if (dev_addr_cmp(dev, type, ha))
+ return dev;
+
+ return NULL;
+ }
+ EXPORT_SYMBOL(dev_getbyhwaddr_rcu);
+
++/**
++ * dev_getbyhwaddr() - find a device by its hardware address
++ * @net: the applicable net namespace
++ * @type: media type of device
++ * @ha: hardware address
++ *
++ * Similar to dev_getbyhwaddr_rcu(), but the owner needs to hold
++ * rtnl_lock.
++ *
++ * Context: rtnl_lock() must be held.
++ * Return: pointer to the net_device, or NULL if not found
++ */
++struct net_device *dev_getbyhwaddr(struct net *net, unsigned short type,
++ const char *ha)
++{
++ struct net_device *dev;
++
++ ASSERT_RTNL();
++ for_each_netdev(net, dev)
++ if (dev_addr_cmp(dev, type, ha))
++ return dev;
++
++ return NULL;
++}
++EXPORT_SYMBOL(dev_getbyhwaddr);
++
+ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
+ {
+ struct net_device *dev, *ret = NULL;
+--
+2.39.5
+
--- /dev/null
+From fb96e843ea6aa2c8e1e1b885350068894aedb871 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 19:29:39 +0100
+Subject: net: allow small head cache usage with large MAX_SKB_FRAGS values
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 14ad6ed30a10afbe91b0749d6378285f4225d482 ]
+
+Sabrina reported the following splat:
+
+ WARNING: CPU: 0 PID: 1 at net/core/dev.c:6935 netif_napi_add_weight_locked+0x8f2/0xba0
+ Modules linked in:
+ CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Not tainted 6.14.0-rc1-net-00092-g011b03359038 #996
+ Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Arch Linux 1.16.3-1-1 04/01/2014
+ RIP: 0010:netif_napi_add_weight_locked+0x8f2/0xba0
+ Code: e8 c3 e6 6a fe 48 83 c4 28 5b 5d 41 5c 41 5d 41 5e 41 5f c3 cc cc cc cc c7 44 24 10 ff ff ff ff e9 8f fb ff ff e8 9e e6 6a fe <0f> 0b e9 d3 fe ff ff e8 92 e6 6a fe 48 8b 04 24 be ff ff ff ff 48
+ RSP: 0000:ffffc9000001fc60 EFLAGS: 00010293
+ RAX: 0000000000000000 RBX: ffff88806ce48128 RCX: 1ffff11001664b9e
+ RDX: ffff888008f00040 RSI: ffffffff8317ca42 RDI: ffff88800b325cb6
+ RBP: ffff88800b325c40 R08: 0000000000000001 R09: ffffed100167502c
+ R10: ffff88800b3a8163 R11: 0000000000000000 R12: ffff88800ac1c168
+ R13: ffff88800ac1c168 R14: ffff88800ac1c168 R15: 0000000000000007
+ FS: 0000000000000000(0000) GS:ffff88806ce00000(0000) knlGS:0000000000000000
+ CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
+ CR2: ffff888008201000 CR3: 0000000004c94001 CR4: 0000000000370ef0
+ DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
+ DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
+ Call Trace:
+ <TASK>
+ gro_cells_init+0x1ba/0x270
+ xfrm_input_init+0x4b/0x2a0
+ xfrm_init+0x38/0x50
+ ip_rt_init+0x2d7/0x350
+ ip_init+0xf/0x20
+ inet_init+0x406/0x590
+ do_one_initcall+0x9d/0x2e0
+ do_initcalls+0x23b/0x280
+ kernel_init_freeable+0x445/0x490
+ kernel_init+0x20/0x1d0
+ ret_from_fork+0x46/0x80
+ ret_from_fork_asm+0x1a/0x30
+ </TASK>
+ irq event stamp: 584330
+ hardirqs last enabled at (584338): [<ffffffff8168bf87>] __up_console_sem+0x77/0xb0
+ hardirqs last disabled at (584345): [<ffffffff8168bf6c>] __up_console_sem+0x5c/0xb0
+ softirqs last enabled at (583242): [<ffffffff833ee96d>] netlink_insert+0x14d/0x470
+ softirqs last disabled at (583754): [<ffffffff8317c8cd>] netif_napi_add_weight_locked+0x77d/0xba0
+
+on kernel built with MAX_SKB_FRAGS=45, where SKB_WITH_OVERHEAD(1024)
+is smaller than GRO_MAX_HEAD.
+
+Such built additionally contains the revert of the single page frag cache
+so that napi_get_frags() ends up using the page frag allocator, triggering
+the splat.
+
+Note that the underlying issue is independent from the mentioned
+revert; address it ensuring that the small head cache will fit either TCP
+and GRO allocation and updating napi_alloc_skb() and __netdev_alloc_skb()
+to select kmalloc() usage for any allocation fitting such cache.
+
+Reported-by: Sabrina Dubroca <sd@queasysnail.net>
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Fixes: 3948b05950fd ("net: introduce a config option to tweak MAX_SKB_FRAGS")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/gro.h | 3 +++
+ net/core/gro.c | 3 ---
+ net/core/skbuff.c | 10 +++++++---
+ 3 files changed, 10 insertions(+), 6 deletions(-)
+
+diff --git a/include/net/gro.h b/include/net/gro.h
+index b9b58c1f8d190..7b548f91754bf 100644
+--- a/include/net/gro.h
++++ b/include/net/gro.h
+@@ -11,6 +11,9 @@
+ #include <net/udp.h>
+ #include <net/hotdata.h>
+
++/* This should be increased if a protocol with a bigger head is added. */
++#define GRO_MAX_HEAD (MAX_HEADER + 128)
++
+ struct napi_gro_cb {
+ union {
+ struct {
+diff --git a/net/core/gro.c b/net/core/gro.c
+index d1f44084e978f..78b320b631744 100644
+--- a/net/core/gro.c
++++ b/net/core/gro.c
+@@ -7,9 +7,6 @@
+
+ #define MAX_GRO_SKBS 8
+
+-/* This should be increased if a protocol with a bigger head is added. */
+-#define GRO_MAX_HEAD (MAX_HEADER + 128)
+-
+ static DEFINE_SPINLOCK(offload_lock);
+
+ /**
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 6841e61a6bd0b..f251a99f8d421 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -69,6 +69,7 @@
+ #include <net/dst.h>
+ #include <net/sock.h>
+ #include <net/checksum.h>
++#include <net/gro.h>
+ #include <net/gso.h>
+ #include <net/hotdata.h>
+ #include <net/ip6_checksum.h>
+@@ -95,7 +96,9 @@
+ static struct kmem_cache *skbuff_ext_cache __ro_after_init;
+ #endif
+
+-#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
++#define GRO_MAX_HEAD_PAD (GRO_MAX_HEAD + NET_SKB_PAD + NET_IP_ALIGN)
++#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(max(MAX_TCP_HEADER, \
++ GRO_MAX_HEAD_PAD))
+
+ /* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
+ * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
+@@ -736,7 +739,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+ */
+- if (len <= SKB_WITH_OVERHEAD(1024) ||
++ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX, NUMA_NO_NODE);
+@@ -816,7 +819,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
+ * When the small frag allocator is available, prefer it over kmalloc
+ * for small fragments
+ */
+- if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) ||
++ if ((!NAPI_HAS_SMALL_PAGE_FRAG &&
++ len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
+--
+2.39.5
+
--- /dev/null
+From 3dab03c4eee3fa5497b5d0dc8a0dd4a48bbef5fb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 13:58:42 +0800
+Subject: net: axienet: Set mac_managed_pm
+
+From: Nick Hu <nick.hu@sifive.com>
+
+[ Upstream commit a370295367b55662a32a4be92565fe72a5aa79bb ]
+
+The external PHY will undergo a soft reset twice during the resume process
+when it wake up from suspend. The first reset occurs when the axienet
+driver calls phylink_of_phy_connect(), and the second occurs when
+mdio_bus_phy_resume() invokes phy_init_hw(). The second soft reset of the
+external PHY does not reinitialize the internal PHY, which causes issues
+with the internal PHY, resulting in the PHY link being down. To prevent
+this, setting the mac_managed_pm flag skips the mdio_bus_phy_resume()
+function.
+
+Fixes: a129b41fe0a8 ("Revert "net: phy: dp83867: perform soft reset and retain established link"")
+Signed-off-by: Nick Hu <nick.hu@sifive.com>
+Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
+Link: https://patch.msgid.link/20250217055843.19799-1-nick.hu@sifive.com
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/xilinx/xilinx_axienet_main.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+index ae743991117c4..300cf7fed8bca 100644
+--- a/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
++++ b/drivers/net/ethernet/xilinx/xilinx_axienet_main.c
+@@ -2888,6 +2888,7 @@ static int axienet_probe(struct platform_device *pdev)
+
+ lp->phylink_config.dev = &ndev->dev;
+ lp->phylink_config.type = PHYLINK_NETDEV;
++ lp->phylink_config.mac_managed_pm = true;
+ lp->phylink_config.mac_capabilities = MAC_SYM_PAUSE | MAC_ASYM_PAUSE |
+ MAC_10FD | MAC_100FD | MAC_1000FD;
+
+--
+2.39.5
+
--- /dev/null
+From 9ecd605aef6d75542cf0ec6864c4602deaf809c6 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jan 2025 19:53:10 -0800
+Subject: net: make netdev_lock() protect netdev->reg_state
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 5fda3f35349b6b7f22f5f5095a3821261d515075 ]
+
+Protect writes to netdev->reg_state with netdev_lock().
+From now on holding netdev_lock() is sufficient to prevent
+the net_device from getting unregistered, so code which
+wants to hold just a single netdev around no longer needs
+to hold rtnl_lock.
+
+We do not protect the NETREG_UNREGISTERED -> NETREG_RELEASED
+transition. We'd need to move mutex_destroy(netdev->lock)
+to .release, but the real reason is that trying to stop
+the unregistration process mid-way would be unsafe / crazy.
+Taking references on such devices is not safe, either.
+So the intended semantics are to lock REGISTERED devices.
+
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250115035319.559603-3-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 2 +-
+ net/core/dev.c | 6 ++++++
+ 2 files changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 4b2964d0d885e..e0a8093c9be80 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2446,7 +2446,7 @@ struct net_device {
+ * Should always be taken using netdev_lock() / netdev_unlock() helpers.
+ * Drivers are free to use it for other protection.
+ *
+- * Protects: @net_shaper_hierarchy.
++ * Protects: @reg_state, @net_shaper_hierarchy.
+ * Ordering: take after rtnl_lock.
+ */
+ struct mutex lock;
+diff --git a/net/core/dev.c b/net/core/dev.c
+index c5e5b827bb800..d1e8613151a4a 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -10687,7 +10687,9 @@ int register_netdevice(struct net_device *dev)
+
+ ret = netdev_register_kobject(dev);
+
++ netdev_lock(dev);
+ WRITE_ONCE(dev->reg_state, ret ? NETREG_UNREGISTERED : NETREG_REGISTERED);
++ netdev_unlock(dev);
+
+ if (ret)
+ goto err_uninit_notify;
+@@ -10985,7 +10987,9 @@ void netdev_run_todo(void)
+ continue;
+ }
+
++ netdev_lock(dev);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERED);
++ netdev_unlock(dev);
+ linkwatch_sync_dev(dev);
+ }
+
+@@ -11591,7 +11595,9 @@ void unregister_netdevice_many_notify(struct list_head *head,
+ list_for_each_entry(dev, head, unreg_list) {
+ /* And unlink it from device chain. */
+ unlist_netdevice(dev);
++ netdev_lock(dev);
+ WRITE_ONCE(dev->reg_state, NETREG_UNREGISTERING);
++ netdev_unlock(dev);
+ }
+ flush_all_backlogs();
+
+--
+2.39.5
+
--- /dev/null
+From cbcf8c6d4b7fb6fab20ae92429077352181644eb Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 7 Jan 2025 08:08:39 -0800
+Subject: net: make sure we retain NAPI ordering on netdev->napi_list
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit d6c7b03497eef8b66bf0b5572881359913e39787 ]
+
+Netlink code depends on NAPI instances being sorted by ID on
+the netdev list for dump continuation. We need to be able to
+find the position on the list where we left off if dump does
+not fit in a single skb, and in the meantime NAPI instances
+can come and go.
+
+This was trivially true when we were assigning a new ID to every
+new NAPI instance. Since we added the NAPI config API, we try
+to retain the ID previously used for the same queue, but still
+add the new NAPI instance at the start of the list.
+
+This is fine if we reset the entire netdev and all NAPIs get
+removed and added back. If driver replaces a NAPI instance
+during an operation like DEVMEM queue reset, or recreates
+a subset of NAPI instances in other ways we may end up with
+broken ordering, and therefore Netlink dumps with either
+missing or duplicated entries.
+
+At this stage the problem is theoretical. Only two drivers
+support queue API, bnxt and gve. gve recreates NAPIs during
+queue reset, but it doesn't support NAPI config.
+bnxt supports NAPI config but doesn't recreate instances
+during reset.
+
+We need to save the ID in the config as soon as it is assigned
+because otherwise the new NAPI will not know what ID it will
+get at enable time, at the time it is being added.
+
+Reviewed-by: Willem de Bruijn <willemb@google.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/dev.c | 42 ++++++++++++++++++++++++++++++++++++------
+ 1 file changed, 36 insertions(+), 6 deletions(-)
+
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 2b09714761c62..c5e5b827bb800 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6766,13 +6766,14 @@ static void napi_restore_config(struct napi_struct *n)
+ n->gro_flush_timeout = n->config->gro_flush_timeout;
+ n->irq_suspend_timeout = n->config->irq_suspend_timeout;
+ /* a NAPI ID might be stored in the config, if so use it. if not, use
+- * napi_hash_add to generate one for us. It will be saved to the config
+- * in napi_disable.
++ * napi_hash_add to generate one for us.
+ */
+- if (n->config->napi_id)
++ if (n->config->napi_id) {
+ napi_hash_add_with_id(n, n->config->napi_id);
+- else
++ } else {
+ napi_hash_add(n);
++ n->config->napi_id = n->napi_id;
++ }
+ }
+
+ static void napi_save_config(struct napi_struct *n)
+@@ -6780,10 +6781,39 @@ static void napi_save_config(struct napi_struct *n)
+ n->config->defer_hard_irqs = n->defer_hard_irqs;
+ n->config->gro_flush_timeout = n->gro_flush_timeout;
+ n->config->irq_suspend_timeout = n->irq_suspend_timeout;
+- n->config->napi_id = n->napi_id;
+ napi_hash_del(n);
+ }
+
++/* Netlink wants the NAPI list to be sorted by ID, if adding a NAPI which will
++ * inherit an existing ID try to insert it at the right position.
++ */
++static void
++netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
++{
++ unsigned int new_id, pos_id;
++ struct list_head *higher;
++ struct napi_struct *pos;
++
++ new_id = UINT_MAX;
++ if (napi->config && napi->config->napi_id)
++ new_id = napi->config->napi_id;
++
++ higher = &dev->napi_list;
++ list_for_each_entry(pos, &dev->napi_list, dev_list) {
++ if (pos->napi_id >= MIN_NAPI_ID)
++ pos_id = pos->napi_id;
++ else if (pos->config)
++ pos_id = pos->config->napi_id;
++ else
++ pos_id = UINT_MAX;
++
++ if (pos_id <= new_id)
++ break;
++ higher = &pos->dev_list;
++ }
++ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
++}
++
+ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int weight)
+ {
+@@ -6810,7 +6840,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ napi->list_owner = -1;
+ set_bit(NAPI_STATE_SCHED, &napi->state);
+ set_bit(NAPI_STATE_NPSVC, &napi->state);
+- list_add_rcu(&napi->dev_list, &dev->napi_list);
++ netif_napi_dev_list_add(dev, napi);
+
+ /* default settings from sysfs are applied to all NAPIs. any per-NAPI
+ * configuration will be loaded in napi_enable
+--
+2.39.5
+
--- /dev/null
+From d828e5de2b0cc4f102f4218c7e466a32fe197a1c Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 14 Jan 2025 19:53:13 -0800
+Subject: net: protect netdev->napi_list with netdev_lock()
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit 1b23cdbd2bbc4b40e21c12ae86c2781e347ff0f8 ]
+
+Hold netdev->lock when NAPIs are getting added or removed.
+This will allow safe access to NAPI instances of a net_device
+without rtnl_lock.
+
+Create a family of helpers which assume the lock is already taken.
+Switch iavf to them, as it makes extensive use of netdev->lock,
+already.
+
+Reviewed-by: Joe Damato <jdamato@fastly.com>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Kuniyuki Iwashima <kuniyu@amazon.com>
+Link: https://patch.msgid.link/20250115035319.559603-6-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/ethernet/intel/iavf/iavf_main.c | 6 +--
+ include/linux/netdevice.h | 54 ++++++++++++++++++---
+ net/core/dev.c | 15 ++++--
+ 3 files changed, 60 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c
+index 72314b0a1b25b..4639f55a17be1 100644
+--- a/drivers/net/ethernet/intel/iavf/iavf_main.c
++++ b/drivers/net/ethernet/intel/iavf/iavf_main.c
+@@ -1815,8 +1815,8 @@ static int iavf_alloc_q_vectors(struct iavf_adapter *adapter)
+ q_vector->v_idx = q_idx;
+ q_vector->reg_idx = q_idx;
+ cpumask_copy(&q_vector->affinity_mask, cpu_possible_mask);
+- netif_napi_add(adapter->netdev, &q_vector->napi,
+- iavf_napi_poll);
++ netif_napi_add_locked(adapter->netdev, &q_vector->napi,
++ iavf_napi_poll);
+ }
+
+ return 0;
+@@ -1842,7 +1842,7 @@ static void iavf_free_q_vectors(struct iavf_adapter *adapter)
+ for (q_idx = 0; q_idx < num_q_vectors; q_idx++) {
+ struct iavf_q_vector *q_vector = &adapter->q_vectors[q_idx];
+
+- netif_napi_del(&q_vector->napi);
++ netif_napi_del_locked(&q_vector->napi);
+ }
+ kfree(adapter->q_vectors);
+ adapter->q_vectors = NULL;
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index eb4d61eee7e97..db4facb384684 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -2454,7 +2454,7 @@ struct net_device {
+ * Drivers are free to use it for other protection.
+ *
+ * Protects:
+- * @net_shaper_hierarchy, @reg_state
++ * @napi_list, @net_shaper_hierarchy, @reg_state
+ *
+ * Partially protects (writers must hold both @lock and rtnl_lock):
+ * @up
+@@ -2714,8 +2714,19 @@ static inline void netif_napi_set_irq(struct napi_struct *napi, int irq)
+ */
+ #define NAPI_POLL_WEIGHT 64
+
+-void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+- int (*poll)(struct napi_struct *, int), int weight);
++void netif_napi_add_weight_locked(struct net_device *dev,
++ struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int),
++ int weight);
++
++static inline void
++netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int), int weight)
++{
++ netdev_lock(dev);
++ netif_napi_add_weight_locked(dev, napi, poll, weight);
++ netdev_unlock(dev);
++}
+
+ /**
+ * netif_napi_add() - initialize a NAPI context
+@@ -2733,6 +2744,13 @@ netif_napi_add(struct net_device *dev, struct napi_struct *napi,
+ netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
+ }
+
++static inline void
++netif_napi_add_locked(struct net_device *dev, struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int))
++{
++ netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
++}
++
+ static inline void
+ netif_napi_add_tx_weight(struct net_device *dev,
+ struct napi_struct *napi,
+@@ -2743,6 +2761,15 @@ netif_napi_add_tx_weight(struct net_device *dev,
+ netif_napi_add_weight(dev, napi, poll, weight);
+ }
+
++static inline void
++netif_napi_add_config_locked(struct net_device *dev, struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int), int index)
++{
++ napi->index = index;
++ napi->config = &dev->napi_config[index];
++ netif_napi_add_weight_locked(dev, napi, poll, NAPI_POLL_WEIGHT);
++}
++
+ /**
+ * netif_napi_add_config - initialize a NAPI context with persistent config
+ * @dev: network device
+@@ -2754,9 +2781,9 @@ static inline void
+ netif_napi_add_config(struct net_device *dev, struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int), int index)
+ {
+- napi->index = index;
+- napi->config = &dev->napi_config[index];
+- netif_napi_add_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
++ netdev_lock(dev);
++ netif_napi_add_config_locked(dev, napi, poll, index);
++ netdev_unlock(dev);
+ }
+
+ /**
+@@ -2776,6 +2803,8 @@ static inline void netif_napi_add_tx(struct net_device *dev,
+ netif_napi_add_tx_weight(dev, napi, poll, NAPI_POLL_WEIGHT);
+ }
+
++void __netif_napi_del_locked(struct napi_struct *napi);
++
+ /**
+ * __netif_napi_del - remove a NAPI context
+ * @napi: NAPI context
+@@ -2784,7 +2813,18 @@ static inline void netif_napi_add_tx(struct net_device *dev,
+ * containing @napi. Drivers might want to call this helper to combine
+ * all the needed RCU grace periods into a single one.
+ */
+-void __netif_napi_del(struct napi_struct *napi);
++static inline void __netif_napi_del(struct napi_struct *napi)
++{
++ netdev_lock(napi->dev);
++ __netif_napi_del_locked(napi);
++ netdev_unlock(napi->dev);
++}
++
++static inline void netif_napi_del_locked(struct napi_struct *napi)
++{
++ __netif_napi_del_locked(napi);
++ synchronize_net();
++}
+
+ /**
+ * netif_napi_del - remove a NAPI context
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 60f48d63559a1..6dfed2746c528 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6814,9 +6814,12 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
+ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
+ }
+
+-void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+- int (*poll)(struct napi_struct *, int), int weight)
++void netif_napi_add_weight_locked(struct net_device *dev,
++ struct napi_struct *napi,
++ int (*poll)(struct napi_struct *, int),
++ int weight)
+ {
++ netdev_assert_locked(dev);
+ if (WARN_ON(test_and_set_bit(NAPI_STATE_LISTED, &napi->state)))
+ return;
+
+@@ -6857,7 +6860,7 @@ void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi,
+ dev->threaded = false;
+ netif_napi_set_irq(napi, -1);
+ }
+-EXPORT_SYMBOL(netif_napi_add_weight);
++EXPORT_SYMBOL(netif_napi_add_weight_locked);
+
+ void napi_disable(struct napi_struct *n)
+ {
+@@ -6928,8 +6931,10 @@ static void flush_gro_hash(struct napi_struct *napi)
+ }
+
+ /* Must be called in process context */
+-void __netif_napi_del(struct napi_struct *napi)
++void __netif_napi_del_locked(struct napi_struct *napi)
+ {
++ netdev_assert_locked(napi->dev);
++
+ if (!test_and_clear_bit(NAPI_STATE_LISTED, &napi->state))
+ return;
+
+@@ -6949,7 +6954,7 @@ void __netif_napi_del(struct napi_struct *napi)
+ napi->thread = NULL;
+ }
+ }
+-EXPORT_SYMBOL(__netif_napi_del);
++EXPORT_SYMBOL(__netif_napi_del_locked);
+
+ static int __napi_poll(struct napi_struct *n, bool *repoll)
+ {
+--
+2.39.5
+
--- /dev/null
+From 8e7d2a0560eac2057b027b960090e8903b50e9e5 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 10:40:21 +0100
+Subject: net: pse-pd: Avoid setting max_uA in regulator constraints
+
+From: Kory Maincent <kory.maincent@bootlin.com>
+
+[ Upstream commit 675d0e3cacc3ae7c29294a5f6a820187f862ad8b ]
+
+Setting the max_uA constraint in the regulator API imposes a current
+limit during the regulator registration process. This behavior conflicts
+with preserving the maximum PI power budget configuration across reboots.
+
+Instead, compare the desired current limit to MAX_PI_CURRENT in the
+pse_pi_set_current_limit() function to ensure proper handling of the
+power budget.
+
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: f6093c5ec74d ("net: pse-pd: pd692x0: Fix power limit retrieval")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pse-pd/pse_core.c | 7 ++++---
+ 1 file changed, 4 insertions(+), 3 deletions(-)
+
+diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c
+index 2906ce173f66c..9fee4dd53515a 100644
+--- a/drivers/net/pse-pd/pse_core.c
++++ b/drivers/net/pse-pd/pse_core.c
+@@ -357,6 +357,9 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA,
+ if (!ops->pi_set_current_limit)
+ return -EOPNOTSUPP;
+
++ if (max_uA > MAX_PI_CURRENT)
++ return -ERANGE;
++
+ id = rdev_get_id(rdev);
+ mutex_lock(&pcdev->lock);
+ ret = ops->pi_set_current_limit(pcdev, id, max_uA);
+@@ -403,11 +406,9 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev,
+
+ rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+
+- if (pcdev->ops->pi_set_current_limit) {
++ if (pcdev->ops->pi_set_current_limit)
+ rinit_data->constraints.valid_ops_mask |=
+ REGULATOR_CHANGE_CURRENT;
+- rinit_data->constraints.max_uA = MAX_PI_CURRENT;
+- }
+
+ rinit_data->supply_regulator = "vpwr";
+
+--
+2.39.5
+
--- /dev/null
+From b91078f97755867eaf085e05e986d2ddea7d6e1a Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 14:48:11 +0100
+Subject: net: pse-pd: pd692x0: Fix power limit retrieval
+
+From: Kory Maincent <kory.maincent@bootlin.com>
+
+[ Upstream commit f6093c5ec74d5cc495f89bd359253d9c738d04d9 ]
+
+Fix incorrect data offset read in the pd692x0_pi_get_pw_limit callback.
+The issue was previously unnoticed as it was only used by the regulator
+API and not thoroughly tested, since the PSE is mainly controlled via
+ethtool.
+
+The function became actively used by ethtool after commit 3e9dbfec4998
+("net: pse-pd: Split ethtool_get_status into multiple callbacks"),
+which led to the discovery of this issue.
+
+Fix it by using the correct data offset.
+
+Fixes: a87e699c9d33 ("net: pse-pd: pd692x0: Enhance with new current limit and voltage read callbacks")
+Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
+Link: https://patch.msgid.link/20250217134812.1925345-1-kory.maincent@bootlin.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pse-pd/pd692x0.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c
+index 9f00538f7e450..7cfc36cadb576 100644
+--- a/drivers/net/pse-pd/pd692x0.c
++++ b/drivers/net/pse-pd/pd692x0.c
+@@ -1012,7 +1012,7 @@ static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev,
+ if (ret < 0)
+ return ret;
+
+- return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]);
++ return pd692x0_pi_get_pw_from_table(buf.data[0], buf.data[1]);
+ }
+
+ static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev,
+--
+2.39.5
+
--- /dev/null
+From 8cd706eef5e70f4c28b45432ff6ea2d466e11634 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 10 Jan 2025 10:40:26 +0100
+Subject: net: pse-pd: Use power limit at driver side instead of current limit
+
+From: Kory Maincent <kory.maincent@bootlin.com>
+
+[ Upstream commit e0a5e2bba38aa61a900934b45d6e846e0a6d7524 ]
+
+The regulator framework uses current limits, but the PSE standard and
+known PSE controllers rely on power limits. Instead of converting
+current to power within each driver, perform the conversion in the PSE
+core. This avoids redundancy in driver implementation and aligns better
+with the standard, simplifying driver development.
+
+Remove at the same time the _pse_ethtool_get_status() function which is
+not needed anymore.
+
+Acked-by: Oleksij Rempel <o.rempel@pengutronix.de>
+Signed-off-by: Kory Maincent <kory.maincent@bootlin.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: f6093c5ec74d ("net: pse-pd: pd692x0: Fix power limit retrieval")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/net/pse-pd/pd692x0.c | 45 ++++-------------
+ drivers/net/pse-pd/pse_core.c | 91 ++++++++++++++++-------------------
+ include/linux/pse-pd/pse.h | 16 +++---
+ 3 files changed, 57 insertions(+), 95 deletions(-)
+
+diff --git a/drivers/net/pse-pd/pd692x0.c b/drivers/net/pse-pd/pd692x0.c
+index 0af7db80b2f88..9f00538f7e450 100644
+--- a/drivers/net/pse-pd/pd692x0.c
++++ b/drivers/net/pse-pd/pd692x0.c
+@@ -999,13 +999,12 @@ static int pd692x0_pi_get_voltage(struct pse_controller_dev *pcdev, int id)
+ return (buf.sub[0] << 8 | buf.sub[1]) * 100000;
+ }
+
+-static int pd692x0_pi_get_current_limit(struct pse_controller_dev *pcdev,
+- int id)
++static int pd692x0_pi_get_pw_limit(struct pse_controller_dev *pcdev,
++ int id)
+ {
+ struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
+ struct pd692x0_msg msg, buf = {0};
+- int mW, uV, uA, ret;
+- s64 tmp_64;
++ int ret;
+
+ msg = pd692x0_msg_template_list[PD692X0_MSG_GET_PORT_PARAM];
+ msg.sub[2] = id;
+@@ -1013,48 +1012,24 @@ static int pd692x0_pi_get_current_limit(struct pse_controller_dev *pcdev,
+ if (ret < 0)
+ return ret;
+
+- ret = pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]);
+- if (ret < 0)
+- return ret;
+- mW = ret;
+-
+- ret = pd692x0_pi_get_voltage(pcdev, id);
+- if (ret < 0)
+- return ret;
+- uV = ret;
+-
+- tmp_64 = mW;
+- tmp_64 *= 1000000000ull;
+- /* uA = mW * 1000000000 / uV */
+- uA = DIV_ROUND_CLOSEST_ULL(tmp_64, uV);
+- return uA;
++ return pd692x0_pi_get_pw_from_table(buf.data[2], buf.data[3]);
+ }
+
+-static int pd692x0_pi_set_current_limit(struct pse_controller_dev *pcdev,
+- int id, int max_uA)
++static int pd692x0_pi_set_pw_limit(struct pse_controller_dev *pcdev,
++ int id, int max_mW)
+ {
+ struct pd692x0_priv *priv = to_pd692x0_priv(pcdev);
+ struct device *dev = &priv->client->dev;
+ struct pd692x0_msg msg, buf = {0};
+- int uV, ret, mW;
+- s64 tmp_64;
++ int ret;
+
+ ret = pd692x0_fw_unavailable(priv);
+ if (ret)
+ return ret;
+
+- ret = pd692x0_pi_get_voltage(pcdev, id);
+- if (ret < 0)
+- return ret;
+- uV = ret;
+-
+ msg = pd692x0_msg_template_list[PD692X0_MSG_SET_PORT_PARAM];
+ msg.sub[2] = id;
+- tmp_64 = uV;
+- tmp_64 *= max_uA;
+- /* mW = uV * uA / 1000000000 */
+- mW = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000);
+- ret = pd692x0_pi_set_pw_from_table(dev, &msg, mW);
++ ret = pd692x0_pi_set_pw_from_table(dev, &msg, max_mW);
+ if (ret)
+ return ret;
+
+@@ -1068,8 +1043,8 @@ static const struct pse_controller_ops pd692x0_ops = {
+ .pi_disable = pd692x0_pi_disable,
+ .pi_is_enabled = pd692x0_pi_is_enabled,
+ .pi_get_voltage = pd692x0_pi_get_voltage,
+- .pi_get_current_limit = pd692x0_pi_get_current_limit,
+- .pi_set_current_limit = pd692x0_pi_set_current_limit,
++ .pi_get_pw_limit = pd692x0_pi_get_pw_limit,
++ .pi_set_pw_limit = pd692x0_pi_set_pw_limit,
+ };
+
+ #define PD692X0_FW_LINE_MAX_SZ 0xff
+diff --git a/drivers/net/pse-pd/pse_core.c b/drivers/net/pse-pd/pse_core.c
+index 9fee4dd53515a..4c5abef9e94ee 100644
+--- a/drivers/net/pse-pd/pse_core.c
++++ b/drivers/net/pse-pd/pse_core.c
+@@ -291,33 +291,25 @@ static int pse_pi_get_voltage(struct regulator_dev *rdev)
+ return ret;
+ }
+
+-static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev,
+- int id,
+- struct netlink_ext_ack *extack,
+- struct pse_control_status *status);
+-
+ static int pse_pi_get_current_limit(struct regulator_dev *rdev)
+ {
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ const struct pse_controller_ops *ops;
+- struct netlink_ext_ack extack = {};
+- struct pse_control_status st = {};
+- int id, uV, ret;
++ int id, uV, mW, ret;
+ s64 tmp_64;
+
+ ops = pcdev->ops;
+ id = rdev_get_id(rdev);
++ if (!ops->pi_get_pw_limit || !ops->pi_get_voltage)
++ return -EOPNOTSUPP;
++
+ mutex_lock(&pcdev->lock);
+- if (ops->pi_get_current_limit) {
+- ret = ops->pi_get_current_limit(pcdev, id);
++ ret = ops->pi_get_pw_limit(pcdev, id);
++ if (ret < 0)
+ goto out;
+- }
++ mW = ret;
+
+- /* If pi_get_current_limit() callback not populated get voltage
+- * from pi_get_voltage() and power limit from ethtool_get_status()
+- * to calculate current limit.
+- */
+- ret = _pse_pi_get_voltage(rdev);
++ ret = pse_pi_get_voltage(rdev);
+ if (!ret) {
+ dev_err(pcdev->dev, "Voltage null\n");
+ ret = -ERANGE;
+@@ -327,16 +319,7 @@ static int pse_pi_get_current_limit(struct regulator_dev *rdev)
+ goto out;
+ uV = ret;
+
+- ret = _pse_ethtool_get_status(pcdev, id, &extack, &st);
+- if (ret)
+- goto out;
+-
+- if (!st.c33_avail_pw_limit) {
+- ret = -ENODATA;
+- goto out;
+- }
+-
+- tmp_64 = st.c33_avail_pw_limit;
++ tmp_64 = mW;
+ tmp_64 *= 1000000000ull;
+ /* uA = mW * 1000000000 / uV */
+ ret = DIV_ROUND_CLOSEST_ULL(tmp_64, uV);
+@@ -351,10 +334,11 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA,
+ {
+ struct pse_controller_dev *pcdev = rdev_get_drvdata(rdev);
+ const struct pse_controller_ops *ops;
+- int id, ret;
++ int id, mW, ret;
++ s64 tmp_64;
+
+ ops = pcdev->ops;
+- if (!ops->pi_set_current_limit)
++ if (!ops->pi_set_pw_limit || !ops->pi_get_voltage)
+ return -EOPNOTSUPP;
+
+ if (max_uA > MAX_PI_CURRENT)
+@@ -362,7 +346,21 @@ static int pse_pi_set_current_limit(struct regulator_dev *rdev, int min_uA,
+
+ id = rdev_get_id(rdev);
+ mutex_lock(&pcdev->lock);
+- ret = ops->pi_set_current_limit(pcdev, id, max_uA);
++ ret = pse_pi_get_voltage(rdev);
++ if (!ret) {
++ dev_err(pcdev->dev, "Voltage null\n");
++ ret = -ERANGE;
++ goto out;
++ }
++ if (ret < 0)
++ goto out;
++
++ tmp_64 = ret;
++ tmp_64 *= max_uA;
++ /* mW = uA * uV / 1000000000 */
++ mW = DIV_ROUND_CLOSEST_ULL(tmp_64, 1000000000);
++ ret = ops->pi_set_pw_limit(pcdev, id, mW);
++out:
+ mutex_unlock(&pcdev->lock);
+
+ return ret;
+@@ -406,7 +404,7 @@ devm_pse_pi_regulator_register(struct pse_controller_dev *pcdev,
+
+ rinit_data->constraints.valid_ops_mask = REGULATOR_CHANGE_STATUS;
+
+- if (pcdev->ops->pi_set_current_limit)
++ if (pcdev->ops->pi_set_pw_limit)
+ rinit_data->constraints.valid_ops_mask |=
+ REGULATOR_CHANGE_CURRENT;
+
+@@ -737,23 +735,6 @@ struct pse_control *of_pse_control_get(struct device_node *node)
+ }
+ EXPORT_SYMBOL_GPL(of_pse_control_get);
+
+-static int _pse_ethtool_get_status(struct pse_controller_dev *pcdev,
+- int id,
+- struct netlink_ext_ack *extack,
+- struct pse_control_status *status)
+-{
+- const struct pse_controller_ops *ops;
+-
+- ops = pcdev->ops;
+- if (!ops->ethtool_get_status) {
+- NL_SET_ERR_MSG(extack,
+- "PSE driver does not support status report");
+- return -EOPNOTSUPP;
+- }
+-
+- return ops->ethtool_get_status(pcdev, id, extack, status);
+-}
+-
+ /**
+ * pse_ethtool_get_status - get status of PSE control
+ * @psec: PSE control pointer
+@@ -766,11 +747,21 @@ int pse_ethtool_get_status(struct pse_control *psec,
+ struct netlink_ext_ack *extack,
+ struct pse_control_status *status)
+ {
++ const struct pse_controller_ops *ops;
++ struct pse_controller_dev *pcdev;
+ int err;
+
+- mutex_lock(&psec->pcdev->lock);
+- err = _pse_ethtool_get_status(psec->pcdev, psec->id, extack, status);
+- mutex_unlock(&psec->pcdev->lock);
++ pcdev = psec->pcdev;
++ ops = pcdev->ops;
++ if (!ops->ethtool_get_status) {
++ NL_SET_ERR_MSG(extack,
++ "PSE driver does not support status report");
++ return -EOPNOTSUPP;
++ }
++
++ mutex_lock(&pcdev->lock);
++ err = ops->ethtool_get_status(pcdev, psec->id, extack, status);
++ mutex_unlock(&pcdev->lock);
+
+ return err;
+ }
+diff --git a/include/linux/pse-pd/pse.h b/include/linux/pse-pd/pse.h
+index 591a53e082e65..df1592022d938 100644
+--- a/include/linux/pse-pd/pse.h
++++ b/include/linux/pse-pd/pse.h
+@@ -75,12 +75,8 @@ struct pse_control_status {
+ * @pi_disable: Configure the PSE PI as disabled.
+ * @pi_get_voltage: Return voltage similarly to get_voltage regulator
+ * callback.
+- * @pi_get_current_limit: Get the configured current limit similarly to
+- * get_current_limit regulator callback.
+- * @pi_set_current_limit: Configure the current limit similarly to
+- * set_current_limit regulator callback.
+- * Should not return an error in case of MAX_PI_CURRENT
+- * current value set.
++ * @pi_get_pw_limit: Get the configured power limit of the PSE PI.
++ * @pi_set_pw_limit: Configure the power limit of the PSE PI.
+ */
+ struct pse_controller_ops {
+ int (*ethtool_get_status)(struct pse_controller_dev *pcdev,
+@@ -91,10 +87,10 @@ struct pse_controller_ops {
+ int (*pi_enable)(struct pse_controller_dev *pcdev, int id);
+ int (*pi_disable)(struct pse_controller_dev *pcdev, int id);
+ int (*pi_get_voltage)(struct pse_controller_dev *pcdev, int id);
+- int (*pi_get_current_limit)(struct pse_controller_dev *pcdev,
+- int id);
+- int (*pi_set_current_limit)(struct pse_controller_dev *pcdev,
+- int id, int max_uA);
++ int (*pi_get_pw_limit)(struct pse_controller_dev *pcdev,
++ int id);
++ int (*pi_set_pw_limit)(struct pse_controller_dev *pcdev,
++ int id, int max_mW);
+ };
+
+ struct module;
+--
+2.39.5
+
--- /dev/null
+From 8c05fb41896c01561c3ed88202f834a3e15b3f77 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 23:36:10 +0100
+Subject: net/sched: cls_api: fix error handling causing NULL dereference
+
+From: Pierre Riteau <pierre@stackhpc.com>
+
+[ Upstream commit 071ed42cff4fcdd89025d966d48eabef59913bf2 ]
+
+tcf_exts_miss_cookie_base_alloc() calls xa_alloc_cyclic() which can
+return 1 if the allocation succeeded after wrapping. This was treated as
+an error, with value 1 returned to caller tcf_exts_init_ex() which sets
+exts->actions to NULL and returns 1 to caller fl_change().
+
+fl_change() treats err == 1 as success, calling tcf_exts_validate_ex()
+which calls tcf_action_init() with exts->actions as argument, where it
+is dereferenced.
+
+Example trace:
+
+BUG: kernel NULL pointer dereference, address: 0000000000000000
+CPU: 114 PID: 16151 Comm: handler114 Kdump: loaded Not tainted 5.14.0-503.16.1.el9_5.x86_64 #1
+RIP: 0010:tcf_action_init+0x1f8/0x2c0
+Call Trace:
+ tcf_action_init+0x1f8/0x2c0
+ tcf_exts_validate_ex+0x175/0x190
+ fl_change+0x537/0x1120 [cls_flower]
+
+Fixes: 80cd22c35c90 ("net/sched: cls_api: Support hardware miss to tc action")
+Signed-off-by: Pierre Riteau <pierre@stackhpc.com>
+Reviewed-by: Michal Swiatkowski <michal.swiatkowski@linux.intel.com>
+Link: https://patch.msgid.link/20250213223610.320278-1-pierre@stackhpc.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/sched/cls_api.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
+index 8e47e5355be61..4f648af8cfaaf 100644
+--- a/net/sched/cls_api.c
++++ b/net/sched/cls_api.c
+@@ -97,7 +97,7 @@ tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
+
+ err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
+ n, xa_limit_32b, &next, GFP_KERNEL);
+- if (err)
++ if (err < 0)
+ goto err_xa_alloc;
+
+ exts->miss_cookie_node = n;
+--
+2.39.5
+
--- /dev/null
+From d844e73c59c4f43a744897c4d7ee97d57ef6f27e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 14:06:23 +0100
+Subject: PCI: Export pci_intx_unmanaged() and pcim_intx()
+
+From: Philipp Stanner <pstanner@redhat.com>
+
+[ Upstream commit f546e8033d8f3e45d49622f04ca2fde650b80f6d ]
+
+pci_intx() is a hybrid function which sometimes performs devres operations,
+depending on whether pcim_enable_device() has been used to enable the
+pci_dev. This sometimes-managed nature of the function is problematic.
+Notably, it causes the function to allocate under some circumstances which
+makes it unusable from interrupt context.
+
+Export pcim_intx() (which is always managed) and rename __pcim_intx()
+(which is never managed) to pci_intx_unmanaged() and export it as well.
+
+Then all callers of pci_intx() can be ported to the version they need,
+depending whether they use pci_enable_device() or pcim_enable_device().
+
+Link: https://lore.kernel.org/r/20241209130632.132074-3-pstanner@redhat.com
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+[bhelgaas: commit log]
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Damien Le Moal <dlemoal@kernel.org>
+Stable-dep-of: d555ed45a5a1 ("PCI: Restore original INTX_DISABLE bit by pcim_intx()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/devres.c | 24 +++---------------------
+ drivers/pci/pci.c | 29 +++++++++++++++++++++++++++++
+ include/linux/pci.h | 2 ++
+ 3 files changed, 34 insertions(+), 21 deletions(-)
+
+diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c
+index 3b59a86a764b1..3594eea379931 100644
+--- a/drivers/pci/devres.c
++++ b/drivers/pci/devres.c
+@@ -411,31 +411,12 @@ static inline bool mask_contains_bar(int mask, int bar)
+ return mask & BIT(bar);
+ }
+
+-/*
+- * This is a copy of pci_intx() used to bypass the problem of recursive
+- * function calls due to the hybrid nature of pci_intx().
+- */
+-static void __pcim_intx(struct pci_dev *pdev, int enable)
+-{
+- u16 pci_command, new;
+-
+- pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+-
+- if (enable)
+- new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
+- else
+- new = pci_command | PCI_COMMAND_INTX_DISABLE;
+-
+- if (new != pci_command)
+- pci_write_config_word(pdev, PCI_COMMAND, new);
+-}
+-
+ static void pcim_intx_restore(struct device *dev, void *data)
+ {
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pcim_intx_devres *res = data;
+
+- __pcim_intx(pdev, res->orig_intx);
++ pci_intx_unmanaged(pdev, res->orig_intx);
+ }
+
+ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
+@@ -472,10 +453,11 @@ int pcim_intx(struct pci_dev *pdev, int enable)
+ return -ENOMEM;
+
+ res->orig_intx = !enable;
+- __pcim_intx(pdev, enable);
++ pci_intx_unmanaged(pdev, enable);
+
+ return 0;
+ }
++EXPORT_SYMBOL_GPL(pcim_intx);
+
+ static void pcim_disable_device(void *pdev_raw)
+ {
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index 661f98c6c63a3..d3c3425f7bc57 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4482,6 +4482,35 @@ void pci_disable_parity(struct pci_dev *dev)
+ }
+ }
+
++/**
++ * pci_intx_unmanaged - enables/disables PCI INTx for device dev,
++ * unmanaged version
++ * @pdev: the PCI device to operate on
++ * @enable: boolean: whether to enable or disable PCI INTx
++ *
++ * Enables/disables PCI INTx for device @pdev
++ *
++ * This function behavios identically to pci_intx(), but is never managed with
++ * devres.
++ */
++void pci_intx_unmanaged(struct pci_dev *pdev, int enable)
++{
++ u16 pci_command, new;
++
++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
++
++ if (enable)
++ new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
++ else
++ new = pci_command | PCI_COMMAND_INTX_DISABLE;
++
++ if (new == pci_command)
++ return;
++
++ pci_write_config_word(pdev, PCI_COMMAND, new);
++}
++EXPORT_SYMBOL_GPL(pci_intx_unmanaged);
++
+ /**
+ * pci_intx - enables/disables PCI INTx for device dev
+ * @pdev: the PCI device to operate on
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index db9b47ce3eefd..b5eb8bda655dc 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -1350,6 +1350,7 @@ int __must_check pcim_set_mwi(struct pci_dev *dev);
+ int pci_try_set_mwi(struct pci_dev *dev);
+ void pci_clear_mwi(struct pci_dev *dev);
+ void pci_disable_parity(struct pci_dev *dev);
++void pci_intx_unmanaged(struct pci_dev *pdev, int enable);
+ void pci_intx(struct pci_dev *dev, int enable);
+ bool pci_check_and_mask_intx(struct pci_dev *dev);
+ bool pci_check_and_unmask_intx(struct pci_dev *dev);
+@@ -2297,6 +2298,7 @@ static inline void pci_fixup_device(enum pci_fixup_pass pass,
+ struct pci_dev *dev) { }
+ #endif
+
++int pcim_intx(struct pci_dev *pdev, int enabled);
+ int pcim_request_all_regions(struct pci_dev *pdev, const char *name);
+ void __iomem *pcim_iomap(struct pci_dev *pdev, int bar, unsigned long maxlen);
+ void __iomem *pcim_iomap_region(struct pci_dev *pdev, int bar,
+--
+2.39.5
+
--- /dev/null
+From 5febb0c979ecf7e4078f8b36c577b2fe7efaf3fa Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 9 Dec 2024 14:06:33 +0100
+Subject: PCI: Remove devres from pci_intx()
+
+From: Philipp Stanner <pstanner@redhat.com>
+
+[ Upstream commit dfa2f4d5f9e5d757700cefa8ee480099889f1c69 ]
+
+pci_intx() is a hybrid function which can sometimes be managed through
+devres. This hybrid nature is undesirable.
+
+Since all users of pci_intx() have by now been ported either to
+always-managed pcim_intx() or never-managed pci_intx_unmanaged(), the
+devres functionality can be removed from pci_intx().
+
+Consequently, pci_intx_unmanaged() is now redundant, because pci_intx()
+itself is now unmanaged.
+
+Remove the devres functionality from pci_intx(). Have all users of
+pci_intx_unmanaged() call pci_intx(). Remove pci_intx_unmanaged().
+
+Link: https://lore.kernel.org/r/20241209130632.132074-13-pstanner@redhat.com
+Signed-off-by: Philipp Stanner <pstanner@redhat.com>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Acked-by: Paolo Abeni <pabeni@redhat.com>
+Stable-dep-of: d555ed45a5a1 ("PCI: Restore original INTX_DISABLE bit by pcim_intx()")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/devres.c | 4 ++--
+ drivers/pci/pci.c | 43 ++-----------------------------------------
+ include/linux/pci.h | 1 -
+ 3 files changed, 4 insertions(+), 44 deletions(-)
+
+diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c
+index 3594eea379931..cc31951347210 100644
+--- a/drivers/pci/devres.c
++++ b/drivers/pci/devres.c
+@@ -416,7 +416,7 @@ static void pcim_intx_restore(struct device *dev, void *data)
+ struct pci_dev *pdev = to_pci_dev(dev);
+ struct pcim_intx_devres *res = data;
+
+- pci_intx_unmanaged(pdev, res->orig_intx);
++ pci_intx(pdev, res->orig_intx);
+ }
+
+ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
+@@ -453,7 +453,7 @@ int pcim_intx(struct pci_dev *pdev, int enable)
+ return -ENOMEM;
+
+ res->orig_intx = !enable;
+- pci_intx_unmanaged(pdev, enable);
++ pci_intx(pdev, enable);
+
+ return 0;
+ }
+diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
+index d3c3425f7bc57..b0ae4bc1a1bee 100644
+--- a/drivers/pci/pci.c
++++ b/drivers/pci/pci.c
+@@ -4483,17 +4483,13 @@ void pci_disable_parity(struct pci_dev *dev)
+ }
+
+ /**
+- * pci_intx_unmanaged - enables/disables PCI INTx for device dev,
+- * unmanaged version
++ * pci_intx - enables/disables PCI INTx for device dev
+ * @pdev: the PCI device to operate on
+ * @enable: boolean: whether to enable or disable PCI INTx
+ *
+ * Enables/disables PCI INTx for device @pdev
+- *
+- * This function behavios identically to pci_intx(), but is never managed with
+- * devres.
+ */
+-void pci_intx_unmanaged(struct pci_dev *pdev, int enable)
++void pci_intx(struct pci_dev *pdev, int enable)
+ {
+ u16 pci_command, new;
+
+@@ -4509,41 +4505,6 @@ void pci_intx_unmanaged(struct pci_dev *pdev, int enable)
+
+ pci_write_config_word(pdev, PCI_COMMAND, new);
+ }
+-EXPORT_SYMBOL_GPL(pci_intx_unmanaged);
+-
+-/**
+- * pci_intx - enables/disables PCI INTx for device dev
+- * @pdev: the PCI device to operate on
+- * @enable: boolean: whether to enable or disable PCI INTx
+- *
+- * Enables/disables PCI INTx for device @pdev
+- *
+- * NOTE:
+- * This is a "hybrid" function: It's normally unmanaged, but becomes managed
+- * when pcim_enable_device() has been called in advance. This hybrid feature is
+- * DEPRECATED! If you want managed cleanup, use pcim_intx() instead.
+- */
+-void pci_intx(struct pci_dev *pdev, int enable)
+-{
+- u16 pci_command, new;
+-
+- pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
+-
+- if (enable)
+- new = pci_command & ~PCI_COMMAND_INTX_DISABLE;
+- else
+- new = pci_command | PCI_COMMAND_INTX_DISABLE;
+-
+- if (new != pci_command) {
+- /* Preserve the "hybrid" behavior for backwards compatibility */
+- if (pci_is_managed(pdev)) {
+- WARN_ON_ONCE(pcim_intx(pdev, enable) != 0);
+- return;
+- }
+-
+- pci_write_config_word(pdev, PCI_COMMAND, new);
+- }
+-}
+ EXPORT_SYMBOL_GPL(pci_intx);
+
+ /**
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index b5eb8bda655dc..f05903dd7695e 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -1350,7 +1350,6 @@ int __must_check pcim_set_mwi(struct pci_dev *dev);
+ int pci_try_set_mwi(struct pci_dev *dev);
+ void pci_clear_mwi(struct pci_dev *dev);
+ void pci_disable_parity(struct pci_dev *dev);
+-void pci_intx_unmanaged(struct pci_dev *pdev, int enable);
+ void pci_intx(struct pci_dev *dev, int enable);
+ bool pci_check_and_mask_intx(struct pci_dev *dev);
+ bool pci_check_and_unmask_intx(struct pci_dev *dev);
+--
+2.39.5
+
--- /dev/null
+From a2b38b54acdf05db9ad39f475292ffc647632b0e Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 31 Oct 2024 14:42:56 +0100
+Subject: PCI: Restore original INTX_DISABLE bit by pcim_intx()
+
+From: Takashi Iwai <tiwai@suse.de>
+
+[ Upstream commit d555ed45a5a10a813528c7685f432369d536ae3d ]
+
+pcim_intx() tries to restore the INTx bit at removal via devres, but there
+is a chance that it restores a wrong value.
+
+Because the value to be restored is blindly assumed to be the negative of
+the enable argument, when a driver calls pcim_intx() unnecessarily for the
+already enabled state, it'll restore to the disabled state in turn. That
+is, the function assumes the case like:
+
+ // INTx == 1
+ pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> correct
+
+but it might be like the following, too:
+
+ // INTx == 0
+ pcim_intx(pdev, 0); // old INTx value assumed to be 1 -> wrong
+
+Also, when a driver calls pcim_intx() multiple times with different enable
+argument values, the last one will win no matter what value it is. This
+can lead to inconsistency, e.g.
+
+ // INTx == 1
+ pcim_intx(pdev, 0); // OK
+ ...
+ pcim_intx(pdev, 1); // now old INTx wrongly assumed to be 0
+
+This patch addresses those inconsistencies by saving the original INTx
+state at the first pcim_intx() call. For that, get_or_create_intx_devres()
+is folded into pcim_intx() caller side; it allows us to simply check the
+already allocated devres and record the original INTx along with the
+devres_alloc() call.
+
+Link: https://lore.kernel.org/r/20241031134300.10296-1-tiwai@suse.de
+Fixes: 25216afc9db5 ("PCI: Add managed pcim_intx()")
+Link: https://lore.kernel.org/87v7xk2ps5.wl-tiwai@suse.de
+Signed-off-by: Takashi Iwai <tiwai@suse.de>
+Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
+Reviewed-by: Philipp Stanner <pstanner@redhat.com>
+Cc: stable@vger.kernel.org # v6.11+
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/pci/devres.c | 34 +++++++++++++++++++---------------
+ 1 file changed, 19 insertions(+), 15 deletions(-)
+
+diff --git a/drivers/pci/devres.c b/drivers/pci/devres.c
+index cc31951347210..1adebcb263bd0 100644
+--- a/drivers/pci/devres.c
++++ b/drivers/pci/devres.c
+@@ -419,19 +419,12 @@ static void pcim_intx_restore(struct device *dev, void *data)
+ pci_intx(pdev, res->orig_intx);
+ }
+
+-static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
++static void save_orig_intx(struct pci_dev *pdev, struct pcim_intx_devres *res)
+ {
+- struct pcim_intx_devres *res;
+-
+- res = devres_find(dev, pcim_intx_restore, NULL, NULL);
+- if (res)
+- return res;
++ u16 pci_command;
+
+- res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL);
+- if (res)
+- devres_add(dev, res);
+-
+- return res;
++ pci_read_config_word(pdev, PCI_COMMAND, &pci_command);
++ res->orig_intx = !(pci_command & PCI_COMMAND_INTX_DISABLE);
+ }
+
+ /**
+@@ -447,12 +440,23 @@ static struct pcim_intx_devres *get_or_create_intx_devres(struct device *dev)
+ int pcim_intx(struct pci_dev *pdev, int enable)
+ {
+ struct pcim_intx_devres *res;
++ struct device *dev = &pdev->dev;
+
+- res = get_or_create_intx_devres(&pdev->dev);
+- if (!res)
+- return -ENOMEM;
++ /*
++ * pcim_intx() must only restore the INTx value that existed before the
++ * driver was loaded, i.e., before it called pcim_intx() for the
++ * first time.
++ */
++ res = devres_find(dev, pcim_intx_restore, NULL, NULL);
++ if (!res) {
++ res = devres_alloc(pcim_intx_restore, sizeof(*res), GFP_KERNEL);
++ if (!res)
++ return -ENOMEM;
++
++ save_orig_intx(pdev, res);
++ devres_add(dev, res);
++ }
+
+- res->orig_intx = !enable;
+ pci_intx(pdev, enable);
+
+ return 0;
+--
+2.39.5
+
--- /dev/null
+From 0ef7976f9aa24afb35b02707d1dba06610db63cc Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Sun, 12 Jan 2025 19:24:46 +0100
+Subject: powerpc/64s: Rewrite __real_pte() and __rpte_to_hidx() as static
+ inline
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit 61bcc752d1b81fde3cae454ff20c1d3c359df500 ]
+
+Rewrite __real_pte() and __rpte_to_hidx() as static inline in order to
+avoid following warnings/errors when building with 4k page size:
+
+ CC arch/powerpc/mm/book3s64/hash_tlb.o
+ arch/powerpc/mm/book3s64/hash_tlb.c: In function 'hpte_need_flush':
+ arch/powerpc/mm/book3s64/hash_tlb.c:49:16: error: variable 'offset' set but not used [-Werror=unused-but-set-variable]
+ 49 | int i, offset;
+ | ^~~~~~
+
+ CC arch/powerpc/mm/book3s64/hash_native.o
+ arch/powerpc/mm/book3s64/hash_native.c: In function 'native_flush_hash_range':
+ arch/powerpc/mm/book3s64/hash_native.c:782:29: error: variable 'index' set but not used [-Werror=unused-but-set-variable]
+ 782 | unsigned long hash, index, hidx, shift, slot;
+ | ^~~~~
+
+Reported-by: kernel test robot <lkp@intel.com>
+Closes: https://lore.kernel.org/oe-kbuild-all/202501081741.AYFwybsq-lkp@intel.com/
+Fixes: ff31e105464d ("powerpc/mm/hash64: Store the slot information at the right offset for hugetlb")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Reviewed-by: Ritesh Harjani (IBM) <ritesh.list@gmail.com>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
+Link: https://patch.msgid.link/e0d340a5b7bd478ecbf245d826e6ab2778b74e06.1736706263.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/include/asm/book3s/64/hash-4k.h | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/arch/powerpc/include/asm/book3s/64/hash-4k.h b/arch/powerpc/include/asm/book3s/64/hash-4k.h
+index c3efacab4b941..aa90a048f319a 100644
+--- a/arch/powerpc/include/asm/book3s/64/hash-4k.h
++++ b/arch/powerpc/include/asm/book3s/64/hash-4k.h
+@@ -77,9 +77,17 @@
+ /*
+ * With 4K page size the real_pte machinery is all nops.
+ */
+-#define __real_pte(e, p, o) ((real_pte_t){(e)})
++static inline real_pte_t __real_pte(pte_t pte, pte_t *ptep, int offset)
++{
++ return (real_pte_t){pte};
++}
++
+ #define __rpte_to_pte(r) ((r).pte)
+-#define __rpte_to_hidx(r,index) (pte_val(__rpte_to_pte(r)) >> H_PAGE_F_GIX_SHIFT)
++
++static inline unsigned long __rpte_to_hidx(real_pte_t rpte, unsigned long index)
++{
++ return pte_val(__rpte_to_pte(rpte)) >> H_PAGE_F_GIX_SHIFT;
++}
+
+ #define pte_iterate_hashed_subpages(rpte, psize, va, index, shift) \
+ do { \
+--
+2.39.5
+
--- /dev/null
+From 9720b897f68663d403633c3d65b9ba079d368425 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 3 Feb 2025 11:14:57 +0100
+Subject: powerpc/code-patching: Disable KASAN report during patching via
+ temporary mm
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit dc9c5166c3cb044f8a001e397195242fd6796eee ]
+
+Erhard reports the following KASAN hit on Talos II (power9) with kernel 6.13:
+
+[ 12.028126] ==================================================================
+[ 12.028198] BUG: KASAN: user-memory-access in copy_to_kernel_nofault+0x8c/0x1a0
+[ 12.028260] Write of size 8 at addr 0000187e458f2000 by task systemd/1
+
+[ 12.028346] CPU: 87 UID: 0 PID: 1 Comm: systemd Tainted: G T 6.13.0-P9-dirty #3
+[ 12.028408] Tainted: [T]=RANDSTRUCT
+[ 12.028446] Hardware name: T2P9D01 REV 1.01 POWER9 0x4e1202 opal:skiboot-bc106a0 PowerNV
+[ 12.028500] Call Trace:
+[ 12.028536] [c000000008dbf3b0] [c000000001656a48] dump_stack_lvl+0xbc/0x110 (unreliable)
+[ 12.028609] [c000000008dbf3f0] [c0000000006e2fc8] print_report+0x6b0/0x708
+[ 12.028666] [c000000008dbf4e0] [c0000000006e2454] kasan_report+0x164/0x300
+[ 12.028725] [c000000008dbf600] [c0000000006e54d4] kasan_check_range+0x314/0x370
+[ 12.028784] [c000000008dbf640] [c0000000006e6310] __kasan_check_write+0x20/0x40
+[ 12.028842] [c000000008dbf660] [c000000000578e8c] copy_to_kernel_nofault+0x8c/0x1a0
+[ 12.028902] [c000000008dbf6a0] [c0000000000acfe4] __patch_instructions+0x194/0x210
+[ 12.028965] [c000000008dbf6e0] [c0000000000ade80] patch_instructions+0x150/0x590
+[ 12.029026] [c000000008dbf7c0] [c0000000001159bc] bpf_arch_text_copy+0x6c/0xe0
+[ 12.029085] [c000000008dbf800] [c000000000424250] bpf_jit_binary_pack_finalize+0x40/0xc0
+[ 12.029147] [c000000008dbf830] [c000000000115dec] bpf_int_jit_compile+0x3bc/0x930
+[ 12.029206] [c000000008dbf990] [c000000000423720] bpf_prog_select_runtime+0x1f0/0x280
+[ 12.029266] [c000000008dbfa00] [c000000000434b18] bpf_prog_load+0xbb8/0x1370
+[ 12.029324] [c000000008dbfb70] [c000000000436ebc] __sys_bpf+0x5ac/0x2e00
+[ 12.029379] [c000000008dbfd00] [c00000000043a228] sys_bpf+0x28/0x40
+[ 12.029435] [c000000008dbfd20] [c000000000038eb4] system_call_exception+0x334/0x610
+[ 12.029497] [c000000008dbfe50] [c00000000000c270] system_call_vectored_common+0xf0/0x280
+[ 12.029561] --- interrupt: 3000 at 0x3fff82f5cfa8
+[ 12.029608] NIP: 00003fff82f5cfa8 LR: 00003fff82f5cfa8 CTR: 0000000000000000
+[ 12.029660] REGS: c000000008dbfe80 TRAP: 3000 Tainted: G T (6.13.0-P9-dirty)
+[ 12.029735] MSR: 900000000280f032 <SF,HV,VEC,VSX,EE,PR,FP,ME,IR,DR,RI> CR: 42004848 XER: 00000000
+[ 12.029855] IRQMASK: 0
+ GPR00: 0000000000000169 00003fffdcf789a0 00003fff83067100 0000000000000005
+ GPR04: 00003fffdcf78a98 0000000000000090 0000000000000000 0000000000000008
+ GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000000000
+ GPR12: 0000000000000000 00003fff836ff7e0 c000000000010678 0000000000000000
+ GPR16: 0000000000000000 0000000000000000 00003fffdcf78f28 00003fffdcf78f90
+ GPR20: 0000000000000000 0000000000000000 0000000000000000 00003fffdcf78f80
+ GPR24: 00003fffdcf78f70 00003fffdcf78d10 00003fff835c7239 00003fffdcf78bd8
+ GPR28: 00003fffdcf78a98 0000000000000000 0000000000000000 000000011f547580
+[ 12.030316] NIP [00003fff82f5cfa8] 0x3fff82f5cfa8
+[ 12.030361] LR [00003fff82f5cfa8] 0x3fff82f5cfa8
+[ 12.030405] --- interrupt: 3000
+[ 12.030444] ==================================================================
+
+Commit c28c15b6d28a ("powerpc/code-patching: Use temporary mm for
+Radix MMU") is inspired from x86 but unlike x86 is doesn't disable
+KASAN reports during patching. This wasn't a problem at the begining
+because __patch_mem() is not instrumented.
+
+Commit 465cabc97b42 ("powerpc/code-patching: introduce
+patch_instructions()") use copy_to_kernel_nofault() to copy several
+instructions at once. But when using temporary mm the destination is
+not regular kernel memory but a kind of kernel-like memory located
+in user address space. Because it is not in kernel address space it is
+not covered by KASAN shadow memory. Since commit e4137f08816b ("mm,
+kasan, kmsan: instrument copy_from/to_kernel_nofault") KASAN reports
+bad accesses from copy_to_kernel_nofault(). Here a bad access to user
+memory is reported because KASAN detects the lack of shadow memory and
+the address is below TASK_SIZE.
+
+Do like x86 in commit b3fd8e83ada0 ("x86/alternatives: Use temporary
+mm for text poking") and disable KASAN reports during patching when
+using temporary mm.
+
+Reported-by: Erhard Furtner <erhard_f@mailbox.org>
+Close: https://lore.kernel.org/all/20250201151435.48400261@yea/
+Fixes: 465cabc97b42 ("powerpc/code-patching: introduce patch_instructions()")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Acked-by: Michael Ellerman <mpe@ellerman.id.au>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
+Link: https://patch.msgid.link/1c05b2a1b02ad75b981cfc45927e0b4a90441046.1738577687.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/lib/code-patching.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
+index af97fbb3c257e..81c0f673eb252 100644
+--- a/arch/powerpc/lib/code-patching.c
++++ b/arch/powerpc/lib/code-patching.c
+@@ -493,7 +493,9 @@ static int __do_patch_instructions_mm(u32 *addr, u32 *code, size_t len, bool rep
+
+ orig_mm = start_using_temp_mm(patching_mm);
+
++ kasan_disable_current();
+ err = __patch_instructions(patch_addr, code, len, repeat_instr);
++ kasan_enable_current();
+
+ /* context synchronisation performed by __patch_instructions */
+ stop_using_temp_mm(patching_mm, orig_mm);
+--
+2.39.5
+
--- /dev/null
+From 6d15bad8bf45e925bccfff3e5025191af67f1a1f Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Wed, 12 Feb 2025 07:46:28 +0100
+Subject: powerpc/code-patching: Fix KASAN hit by not flagging text patching
+ area as VM_ALLOC
+
+From: Christophe Leroy <christophe.leroy@csgroup.eu>
+
+[ Upstream commit d262a192d38e527faa5984629aabda2e0d1c4f54 ]
+
+Erhard reported the following KASAN hit while booting his PowerMac G4
+with a KASAN-enabled kernel 6.13-rc6:
+
+ BUG: KASAN: vmalloc-out-of-bounds in copy_to_kernel_nofault+0xd8/0x1c8
+ Write of size 8 at addr f1000000 by task chronyd/1293
+
+ CPU: 0 UID: 123 PID: 1293 Comm: chronyd Tainted: G W 6.13.0-rc6-PMacG4 #2
+ Tainted: [W]=WARN
+ Hardware name: PowerMac3,6 7455 0x80010303 PowerMac
+ Call Trace:
+ [c2437590] [c1631a84] dump_stack_lvl+0x70/0x8c (unreliable)
+ [c24375b0] [c0504998] print_report+0xdc/0x504
+ [c2437610] [c050475c] kasan_report+0xf8/0x108
+ [c2437690] [c0505a3c] kasan_check_range+0x24/0x18c
+ [c24376a0] [c03fb5e4] copy_to_kernel_nofault+0xd8/0x1c8
+ [c24376c0] [c004c014] patch_instructions+0x15c/0x16c
+ [c2437710] [c00731a8] bpf_arch_text_copy+0x60/0x7c
+ [c2437730] [c0281168] bpf_jit_binary_pack_finalize+0x50/0xac
+ [c2437750] [c0073cf4] bpf_int_jit_compile+0xb30/0xdec
+ [c2437880] [c0280394] bpf_prog_select_runtime+0x15c/0x478
+ [c24378d0] [c1263428] bpf_prepare_filter+0xbf8/0xc14
+ [c2437990] [c12677ec] bpf_prog_create_from_user+0x258/0x2b4
+ [c24379d0] [c027111c] do_seccomp+0x3dc/0x1890
+ [c2437ac0] [c001d8e0] system_call_exception+0x2dc/0x420
+ [c2437f30] [c00281ac] ret_from_syscall+0x0/0x2c
+ --- interrupt: c00 at 0x5a1274
+ NIP: 005a1274 LR: 006a3b3c CTR: 005296c8
+ REGS: c2437f40 TRAP: 0c00 Tainted: G W (6.13.0-rc6-PMacG4)
+ MSR: 0200f932 <VEC,EE,PR,FP,ME,IR,DR,RI> CR: 24004422 XER: 00000000
+
+ GPR00: 00000166 af8f3fa0 a7ee3540 00000001 00000000 013b6500 005a5858 0200f932
+ GPR08: 00000000 00001fe9 013d5fc8 005296c8 2822244c 00b2fcd8 00000000 af8f4b57
+ GPR16: 00000000 00000001 00000000 00000000 00000000 00000001 00000000 00000002
+ GPR24: 00afdbb0 00000000 00000000 00000000 006e0004 013ce060 006e7c1c 00000001
+ NIP [005a1274] 0x5a1274
+ LR [006a3b3c] 0x6a3b3c
+ --- interrupt: c00
+
+ The buggy address belongs to the virtual mapping at
+ [f1000000, f1002000) created by:
+ text_area_cpu_up+0x20/0x190
+
+ The buggy address belongs to the physical page:
+ page: refcount:1 mapcount:0 mapping:00000000 index:0x0 pfn:0x76e30
+ flags: 0x80000000(zone=2)
+ raw: 80000000 00000000 00000122 00000000 00000000 00000000 ffffffff 00000001
+ raw: 00000000
+ page dumped because: kasan: bad access detected
+
+ Memory state around the buggy address:
+ f0ffff00: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ f0ffff80: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
+ >f1000000: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
+ ^
+ f1000080: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
+ f1000100: f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8 f8
+ ==================================================================
+
+f8 corresponds to KASAN_VMALLOC_INVALID which means the area is not
+initialised hence not supposed to be used yet.
+
+Powerpc text patching infrastructure allocates a virtual memory area
+using get_vm_area() and flags it as VM_ALLOC. But that flag is meant
+to be used for vmalloc() and vmalloc() allocated memory is not
+supposed to be used before a call to __vmalloc_node_range() which is
+never called for that area.
+
+That went undetected until commit e4137f08816b ("mm, kasan, kmsan:
+instrument copy_from/to_kernel_nofault")
+
+The area allocated by text_area_cpu_up() is not vmalloc memory, it is
+mapped directly on demand when needed by map_kernel_page(). There is
+no VM flag corresponding to such usage, so just pass no flag. That way
+the area will be unpoisonned and usable immediately.
+
+Reported-by: Erhard Furtner <erhard_f@mailbox.org>
+Closes: https://lore.kernel.org/all/20250112135832.57c92322@yea/
+Fixes: 37bc3e5fd764 ("powerpc/lib/code-patching: Use alternate map for patch_instruction()")
+Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
+Signed-off-by: Madhavan Srinivasan <maddy@linux.ibm.com>
+Link: https://patch.msgid.link/06621423da339b374f48c0886e3a5db18e896be8.1739342693.git.christophe.leroy@csgroup.eu
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ arch/powerpc/lib/code-patching.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
+index 81c0f673eb252..f84e0337cc029 100644
+--- a/arch/powerpc/lib/code-patching.c
++++ b/arch/powerpc/lib/code-patching.c
+@@ -108,7 +108,7 @@ static int text_area_cpu_up(unsigned int cpu)
+ unsigned long addr;
+ int err;
+
+- area = get_vm_area(PAGE_SIZE, VM_ALLOC);
++ area = get_vm_area(PAGE_SIZE, 0);
+ if (!area) {
+ WARN_ONCE(1, "Failed to create text area for cpu %d\n",
+ cpu);
+--
+2.39.5
+
--- /dev/null
+From fa448d203fb13c76dc22efb5e7521731b7d0c879 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 18 Feb 2025 19:29:40 +0100
+Subject: Revert "net: skb: introduce and use a single page frag cache"
+
+From: Paolo Abeni <pabeni@redhat.com>
+
+[ Upstream commit 011b0335903832facca86cd8ed05d7d8d94c9c76 ]
+
+This reverts commit dbae2b062824 ("net: skb: introduce and use a single
+page frag cache"). The intended goal of such change was to counter a
+performance regression introduced by commit 3226b158e67c ("net: avoid
+32 x truesize under-estimation for tiny skbs").
+
+Unfortunately, the blamed commit introduces another regression for the
+virtio_net driver. Such a driver calls napi_alloc_skb() with a tiny
+size, so that the whole head frag could fit a 512-byte block.
+
+The single page frag cache uses a 1K fragment for such allocation, and
+the additional overhead, under small UDP packets flood, makes the page
+allocator a bottleneck.
+
+Thanks to commit bf9f1baa279f ("net: add dedicated kmem_cache for
+typical/small skb->head"), this revert does not re-introduce the
+original regression. Actually, in the relevant test on top of this
+revert, I measure a small but noticeable positive delta, just above
+noise level.
+
+The revert itself required some additional mangling due to the
+introduction of the SKB_HEAD_ALIGN() helper and local lock infra in the
+affected code.
+
+Suggested-by: Eric Dumazet <edumazet@google.com>
+Fixes: dbae2b062824 ("net: skb: introduce and use a single page frag cache")
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Link: https://patch.msgid.link/e649212fde9f0fdee23909ca0d14158d32bb7425.1738877290.git.pabeni@redhat.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Stable-dep-of: 6bc7e4eb0499 ("Revert "net: skb: introduce and use a single page frag cache"")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/linux/netdevice.h | 1 -
+ net/core/dev.c | 17 +++++++
+ net/core/skbuff.c | 104 ++------------------------------------
+ 3 files changed, 22 insertions(+), 100 deletions(-)
+
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index db4facb384684..48437fd44e32c 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -4050,7 +4050,6 @@ void netif_receive_skb_list(struct list_head *head);
+ gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb);
+ void napi_gro_flush(struct napi_struct *napi, bool flush_old);
+ struct sk_buff *napi_get_frags(struct napi_struct *napi);
+-void napi_get_frags_check(struct napi_struct *napi);
+ gro_result_t napi_gro_frags(struct napi_struct *napi);
+
+ static inline void napi_free_frags(struct napi_struct *napi)
+diff --git a/net/core/dev.c b/net/core/dev.c
+index 6dfed2746c528..5e3a82eba041a 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -6814,6 +6814,23 @@ netif_napi_dev_list_add(struct net_device *dev, struct napi_struct *napi)
+ list_add_rcu(&napi->dev_list, higher); /* adds after higher */
+ }
+
++/* Double check that napi_get_frags() allocates skbs with
++ * skb->head being backed by slab, not a page fragment.
++ * This is to make sure bug fixed in 3226b158e67c
++ * ("net: avoid 32 x truesize under-estimation for tiny skbs")
++ * does not accidentally come back.
++ */
++static void napi_get_frags_check(struct napi_struct *napi)
++{
++ struct sk_buff *skb;
++
++ local_bh_disable();
++ skb = napi_get_frags(napi);
++ WARN_ON_ONCE(skb && skb->head_frag);
++ napi_free_frags(napi);
++ local_bh_enable();
++}
++
+ void netif_napi_add_weight_locked(struct net_device *dev,
+ struct napi_struct *napi,
+ int (*poll)(struct napi_struct *, int),
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index f251a99f8d421..d2697211e00a0 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -223,67 +223,9 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr)
+ #define NAPI_SKB_CACHE_BULK 16
+ #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2)
+
+-#if PAGE_SIZE == SZ_4K
+-
+-#define NAPI_HAS_SMALL_PAGE_FRAG 1
+-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc)
+-
+-/* specialized page frag allocator using a single order 0 page
+- * and slicing it into 1K sized fragment. Constrained to systems
+- * with a very limited amount of 1K fragments fitting a single
+- * page - to avoid excessive truesize underestimation
+- */
+-
+-struct page_frag_1k {
+- void *va;
+- u16 offset;
+- bool pfmemalloc;
+-};
+-
+-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp)
+-{
+- struct page *page;
+- int offset;
+-
+- offset = nc->offset - SZ_1K;
+- if (likely(offset >= 0))
+- goto use_frag;
+-
+- page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+- if (!page)
+- return NULL;
+-
+- nc->va = page_address(page);
+- nc->pfmemalloc = page_is_pfmemalloc(page);
+- offset = PAGE_SIZE - SZ_1K;
+- page_ref_add(page, offset / SZ_1K);
+-
+-use_frag:
+- nc->offset = offset;
+- return nc->va + offset;
+-}
+-#else
+-
+-/* the small page is actually unused in this build; add dummy helpers
+- * to please the compiler and avoid later preprocessor's conditionals
+- */
+-#define NAPI_HAS_SMALL_PAGE_FRAG 0
+-#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false
+-
+-struct page_frag_1k {
+-};
+-
+-static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask)
+-{
+- return NULL;
+-}
+-
+-#endif
+-
+ struct napi_alloc_cache {
+ local_lock_t bh_lock;
+ struct page_frag_cache page;
+- struct page_frag_1k page_small;
+ unsigned int skb_count;
+ void *skb_cache[NAPI_SKB_CACHE_SIZE];
+ };
+@@ -293,23 +235,6 @@ static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache) = {
+ .bh_lock = INIT_LOCAL_LOCK(bh_lock),
+ };
+
+-/* Double check that napi_get_frags() allocates skbs with
+- * skb->head being backed by slab, not a page fragment.
+- * This is to make sure bug fixed in 3226b158e67c
+- * ("net: avoid 32 x truesize under-estimation for tiny skbs")
+- * does not accidentally come back.
+- */
+-void napi_get_frags_check(struct napi_struct *napi)
+-{
+- struct sk_buff *skb;
+-
+- local_bh_disable();
+- skb = napi_get_frags(napi);
+- WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag);
+- napi_free_frags(napi);
+- local_bh_enable();
+-}
+-
+ void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask)
+ {
+ struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
+@@ -816,11 +741,8 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
+
+ /* If requested length is either too small or too big,
+ * we use kmalloc() for skb->head allocation.
+- * When the small frag allocator is available, prefer it over kmalloc
+- * for small fragments
+ */
+- if ((!NAPI_HAS_SMALL_PAGE_FRAG &&
+- len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)) ||
++ if (len <= SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE) ||
+ len > SKB_WITH_OVERHEAD(PAGE_SIZE) ||
+ (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) {
+ skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI,
+@@ -830,32 +752,16 @@ struct sk_buff *napi_alloc_skb(struct napi_struct *napi, unsigned int len)
+ goto skb_success;
+ }
+
++ len = SKB_HEAD_ALIGN(len);
++
+ if (sk_memalloc_socks())
+ gfp_mask |= __GFP_MEMALLOC;
+
+ local_lock_nested_bh(&napi_alloc_cache.bh_lock);
+ nc = this_cpu_ptr(&napi_alloc_cache);
+- if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) {
+- /* we are artificially inflating the allocation size, but
+- * that is not as bad as it may look like, as:
+- * - 'len' less than GRO_MAX_HEAD makes little sense
+- * - On most systems, larger 'len' values lead to fragment
+- * size above 512 bytes
+- * - kmalloc would use the kmalloc-1k slab for such values
+- * - Builds with smaller GRO_MAX_HEAD will very likely do
+- * little networking, as that implies no WiFi and no
+- * tunnels support, and 32 bits arches.
+- */
+- len = SZ_1K;
+
+- data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
+- pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
+- } else {
+- len = SKB_HEAD_ALIGN(len);
+-
+- data = page_frag_alloc(&nc->page, len, gfp_mask);
+- pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
+- }
++ data = page_frag_alloc(&nc->page, len, gfp_mask);
++ pfmemalloc = page_frag_cache_is_pfmemalloc(&nc->page);
+ local_unlock_nested_bh(&napi_alloc_cache.bh_lock);
+
+ if (unlikely(!data))
+--
+2.39.5
+
--- /dev/null
+From 750c8e7fd63383c41ca30dd629828ea87d0ec2d8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 13:01:37 +0100
+Subject: s390/ism: add release function for struct device
+
+From: Julian Ruess <julianr@linux.ibm.com>
+
+[ Upstream commit 915e34d5ad35a6a9e56113f852ade4a730fb88f0 ]
+
+According to device_release() in /drivers/base/core.c,
+a device without a release function is a broken device
+and must be fixed.
+
+The current code directly frees the device after calling device_add()
+without waiting for other kernel parts to release their references.
+Thus, a reference could still be held to a struct device,
+e.g., by sysfs, leading to potential use-after-free
+issues if a proper release function is not set.
+
+Fixes: 8c81ba20349d ("net/smc: De-tangle ism and smc device initialization")
+Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
+Reviewed-by: Wenjia Zhang <wenjia@linux.ibm.com>
+Signed-off-by: Julian Ruess <julianr@linux.ibm.com>
+Signed-off-by: Alexandra Winter <wintera@linux.ibm.com>
+Reviewed-by: Simon Horman <horms@kernel.org>
+Link: https://patch.msgid.link/20250214120137.563409-1-wintera@linux.ibm.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/s390/net/ism_drv.c | 14 ++++++++++++--
+ 1 file changed, 12 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/s390/net/ism_drv.c b/drivers/s390/net/ism_drv.c
+index e36e3ea165d3b..2f34761e64135 100644
+--- a/drivers/s390/net/ism_drv.c
++++ b/drivers/s390/net/ism_drv.c
+@@ -588,6 +588,15 @@ static int ism_dev_init(struct ism_dev *ism)
+ return ret;
+ }
+
++static void ism_dev_release(struct device *dev)
++{
++ struct ism_dev *ism;
++
++ ism = container_of(dev, struct ism_dev, dev);
++
++ kfree(ism);
++}
++
+ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ {
+ struct ism_dev *ism;
+@@ -601,6 +610,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ dev_set_drvdata(&pdev->dev, ism);
+ ism->pdev = pdev;
+ ism->dev.parent = &pdev->dev;
++ ism->dev.release = ism_dev_release;
+ device_initialize(&ism->dev);
+ dev_set_name(&ism->dev, dev_name(&pdev->dev));
+ ret = device_add(&ism->dev);
+@@ -637,7 +647,7 @@ static int ism_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+ device_del(&ism->dev);
+ err_dev:
+ dev_set_drvdata(&pdev->dev, NULL);
+- kfree(ism);
++ put_device(&ism->dev);
+
+ return ret;
+ }
+@@ -682,7 +692,7 @@ static void ism_remove(struct pci_dev *pdev)
+ pci_disable_device(pdev);
+ device_del(&ism->dev);
+ dev_set_drvdata(&pdev->dev, NULL);
+- kfree(ism);
++ put_device(&ism->dev);
+ }
+
+ static struct pci_driver ism_driver = {
+--
+2.39.5
+
--- /dev/null
+From 47a75a971c13026bde21c8ac9fd93f4684dc1d54 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jan 2025 20:22:48 +0200
+Subject: serial: sh-sci: Clean sci_ports[0] after at earlycon exit
+
+From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+[ Upstream commit 5f1017069933489add0c08659673443c9905659e ]
+
+The early_console_setup() function initializes sci_ports[0].port with an
+object of type struct uart_port obtained from the struct earlycon_device
+passed as an argument to early_console_setup().
+
+Later, during serial port probing, the serial port used as earlycon
+(e.g., port A) might be remapped to a different position in the sci_ports[]
+array, and a different serial port (e.g., port B) might be assigned to slot
+0. For example:
+
+sci_ports[0] = port B
+sci_ports[X] = port A
+
+In this scenario, the new port mapped at index zero (port B) retains the
+data associated with the earlycon configuration. Consequently, after the
+Linux boot process, any access to the serial port now mapped to
+sci_ports[0] (port B) will block the original earlycon port (port A).
+
+To address this, introduce an early_console_exit() function to clean up
+sci_ports[0] when earlycon is exited.
+
+To prevent the cleanup of sci_ports[0] while the serial device is still
+being used by earlycon, introduce the struct sci_port::probing flag and
+account for it in early_console_exit().
+
+Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+Link: https://lore.kernel.org/r/20250116182249.3828577-5-claudiu.beznea.uj@bp.renesas.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 651dee03696e ("serial: sh-sci: Increment the runtime usage counter for the earlycon device")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/sh-sci.c | 32 ++++++++++++++++++++++++++++++--
+ 1 file changed, 30 insertions(+), 2 deletions(-)
+
+diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
+index fece52c7f8976..12215d4107d1c 100644
+--- a/drivers/tty/serial/sh-sci.c
++++ b/drivers/tty/serial/sh-sci.c
+@@ -166,6 +166,7 @@ static struct sci_port sci_ports[SCI_NPORTS];
+ static unsigned long sci_ports_in_use;
+ static struct uart_driver sci_uart_driver;
+ static bool sci_uart_earlycon;
++static bool sci_uart_earlycon_dev_probing;
+
+ static inline struct sci_port *
+ to_sci_port(struct uart_port *uart)
+@@ -3386,7 +3387,8 @@ static struct plat_sci_port *sci_parse_dt(struct platform_device *pdev,
+ static int sci_probe_single(struct platform_device *dev,
+ unsigned int index,
+ struct plat_sci_port *p,
+- struct sci_port *sciport)
++ struct sci_port *sciport,
++ struct resource *sci_res)
+ {
+ int ret;
+
+@@ -3433,6 +3435,14 @@ static int sci_probe_single(struct platform_device *dev,
+ sciport->port.flags |= UPF_HARD_FLOW;
+ }
+
++ if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) {
++ /*
++ * Skip cleanup the sci_port[0] in early_console_exit(), this
++ * port is the same as the earlycon one.
++ */
++ sci_uart_earlycon_dev_probing = true;
++ }
++
+ return uart_add_one_port(&sci_uart_driver, &sciport->port);
+ }
+
+@@ -3491,7 +3501,7 @@ static int sci_probe(struct platform_device *dev)
+
+ platform_set_drvdata(dev, sp);
+
+- ret = sci_probe_single(dev, dev_id, p, sp);
++ ret = sci_probe_single(dev, dev_id, p, sp, res);
+ if (ret)
+ return ret;
+
+@@ -3574,6 +3584,22 @@ sh_early_platform_init_buffer("earlyprintk", &sci_driver,
+ #ifdef CONFIG_SERIAL_SH_SCI_EARLYCON
+ static struct plat_sci_port port_cfg;
+
++static int early_console_exit(struct console *co)
++{
++ struct sci_port *sci_port = &sci_ports[0];
++
++ /*
++ * Clean the slot used by earlycon. A new SCI device might
++ * map to this slot.
++ */
++ if (!sci_uart_earlycon_dev_probing) {
++ memset(sci_port, 0, sizeof(*sci_port));
++ sci_uart_earlycon = false;
++ }
++
++ return 0;
++}
++
+ static int __init early_console_setup(struct earlycon_device *device,
+ int type)
+ {
+@@ -3591,6 +3617,8 @@ static int __init early_console_setup(struct earlycon_device *device,
+ SCSCR_RE | SCSCR_TE | port_cfg.scscr);
+
+ device->con->write = serial_console_write;
++ device->con->exit = early_console_exit;
++
+ return 0;
+ }
+ static int __init sci_early_console_setup(struct earlycon_device *device,
+--
+2.39.5
+
--- /dev/null
+From fc8b9cd788142d5c16a6067f93bba3a3406c2c51 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jan 2025 20:22:49 +0200
+Subject: serial: sh-sci: Increment the runtime usage counter for the earlycon
+ device
+
+From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+[ Upstream commit 651dee03696e1dfde6d9a7e8664bbdcd9a10ea7f ]
+
+In the sh-sci driver, serial ports are mapped to the sci_ports[] array,
+with earlycon mapped at index zero.
+
+The uart_add_one_port() function eventually calls __device_attach(),
+which, in turn, calls pm_request_idle(). The identified code path is as
+follows:
+
+uart_add_one_port() ->
+ serial_ctrl_register_port() ->
+ serial_core_register_port() ->
+ serial_core_port_device_add() ->
+ serial_base_port_add() ->
+ device_add() ->
+ bus_probe_device() ->
+ device_initial_probe() ->
+ __device_attach() ->
+ // ...
+ if (dev->p->dead) {
+ // ...
+ } else if (dev->driver) {
+ // ...
+ } else {
+ // ...
+ pm_request_idle(dev);
+ // ...
+ }
+
+The earlycon device clocks are enabled by the bootloader. However, the
+pm_request_idle() call in __device_attach() disables the SCI port clocks
+while earlycon is still active.
+
+The earlycon write function, serial_console_write(), calls
+sci_poll_put_char() via serial_console_putchar(). If the SCI port clocks
+are disabled, writing to earlycon may sometimes cause the SR.TDFE bit to
+remain unset indefinitely, causing the while loop in sci_poll_put_char()
+to never exit. On single-core SoCs, this can result in the system being
+blocked during boot when this issue occurs.
+
+To resolve this, increment the runtime PM usage counter for the earlycon
+SCI device before registering the UART port.
+
+Fixes: 0b0cced19ab1 ("serial: sh-sci: Add CONFIG_SERIAL_EARLYCON support")
+Cc: stable@vger.kernel.org
+Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+Link: https://lore.kernel.org/r/20250116182249.3828577-6-claudiu.beznea.uj@bp.renesas.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/sh-sci.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
+index 12215d4107d1c..1a050ec9912cb 100644
+--- a/drivers/tty/serial/sh-sci.c
++++ b/drivers/tty/serial/sh-sci.c
+@@ -3436,6 +3436,22 @@ static int sci_probe_single(struct platform_device *dev,
+ }
+
+ if (sci_uart_earlycon && sci_ports[0].port.mapbase == sci_res->start) {
++ /*
++ * In case:
++ * - this is the earlycon port (mapped on index 0 in sci_ports[]) and
++ * - it now maps to an alias other than zero and
++ * - the earlycon is still alive (e.g., "earlycon keep_bootcon" is
++ * available in bootargs)
++ *
++ * we need to avoid disabling clocks and PM domains through the runtime
++ * PM APIs called in __device_attach(). For this, increment the runtime
++ * PM reference counter (the clocks and PM domains were already enabled
++ * by the bootloader). Otherwise the earlycon may access the HW when it
++ * has no clocks enabled leading to failures (infinite loop in
++ * sci_poll_put_char()).
++ */
++ pm_runtime_get_noresume(&dev->dev);
++
+ /*
+ * Skip cleanup the sci_port[0] in early_console_exit(), this
+ * port is the same as the earlycon one.
+--
+2.39.5
+
--- /dev/null
+From 294a344a30655709b4597321b152b86e60ffffa8 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 16 Jan 2025 20:22:46 +0200
+Subject: serial: sh-sci: Move runtime PM enable to sci_probe_single()
+
+From: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+
+[ Upstream commit 239f11209e5f282e16f5241b99256e25dd0614b6 ]
+
+Relocate the runtime PM enable operation to sci_probe_single(). This change
+prepares the codebase for upcoming fixes.
+
+While at it, replace the existing logic with a direct call to
+devm_pm_runtime_enable() and remove sci_cleanup_single(). The
+devm_pm_runtime_enable() function automatically handles disabling runtime
+PM during driver removal.
+
+Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
+Signed-off-by: Claudiu Beznea <claudiu.beznea.uj@bp.renesas.com>
+Link: https://lore.kernel.org/r/20250116182249.3828577-3-claudiu.beznea.uj@bp.renesas.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Stable-dep-of: 651dee03696e ("serial: sh-sci: Increment the runtime usage counter for the earlycon device")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/tty/serial/sh-sci.c | 24 ++++++------------------
+ 1 file changed, 6 insertions(+), 18 deletions(-)
+
+diff --git a/drivers/tty/serial/sh-sci.c b/drivers/tty/serial/sh-sci.c
+index 0050d6253c05d..fece52c7f8976 100644
+--- a/drivers/tty/serial/sh-sci.c
++++ b/drivers/tty/serial/sh-sci.c
+@@ -3057,10 +3057,6 @@ static int sci_init_single(struct platform_device *dev,
+ ret = sci_init_clocks(sci_port, &dev->dev);
+ if (ret < 0)
+ return ret;
+-
+- port->dev = &dev->dev;
+-
+- pm_runtime_enable(&dev->dev);
+ }
+
+ port->type = p->type;
+@@ -3087,11 +3083,6 @@ static int sci_init_single(struct platform_device *dev,
+ return 0;
+ }
+
+-static void sci_cleanup_single(struct sci_port *port)
+-{
+- pm_runtime_disable(port->port.dev);
+-}
+-
+ #if defined(CONFIG_SERIAL_SH_SCI_CONSOLE) || \
+ defined(CONFIG_SERIAL_SH_SCI_EARLYCON)
+ static void serial_console_putchar(struct uart_port *port, unsigned char ch)
+@@ -3261,8 +3252,6 @@ static void sci_remove(struct platform_device *dev)
+ sci_ports_in_use &= ~BIT(port->port.line);
+ uart_remove_one_port(&sci_uart_driver, &port->port);
+
+- sci_cleanup_single(port);
+-
+ if (port->port.fifosize > 1)
+ device_remove_file(&dev->dev, &dev_attr_rx_fifo_trigger);
+ if (type == PORT_SCIFA || type == PORT_SCIFB || type == PORT_HSCIF)
+@@ -3426,6 +3415,11 @@ static int sci_probe_single(struct platform_device *dev,
+ if (ret)
+ return ret;
+
++ sciport->port.dev = &dev->dev;
++ ret = devm_pm_runtime_enable(&dev->dev);
++ if (ret)
++ return ret;
++
+ sciport->gpios = mctrl_gpio_init(&sciport->port, 0);
+ if (IS_ERR(sciport->gpios))
+ return PTR_ERR(sciport->gpios);
+@@ -3439,13 +3433,7 @@ static int sci_probe_single(struct platform_device *dev,
+ sciport->port.flags |= UPF_HARD_FLOW;
+ }
+
+- ret = uart_add_one_port(&sci_uart_driver, &sciport->port);
+- if (ret) {
+- sci_cleanup_single(sciport);
+- return ret;
+- }
+-
+- return 0;
++ return uart_add_one_port(&sci_uart_driver, &sciport->port);
+ }
+
+ static int sci_probe(struct platform_device *dev)
+--
+2.39.5
+
--- /dev/null
+btrfs-use-btrfs_inode-in-extent_writepage.patch
+btrfs-fix-double-accounting-race-when-btrfs_run_dela.patch
+btrfs-fix-double-accounting-race-when-extent_writepa.patch
+drm-amd-display-update-dcn351-used-clock-offset.patch
+drm-amd-display-correct-register-address-in-dcn35.patch
+bluetooth-qca-update-firmware-name-to-support-board-.patch
+bluetooth-qca-fix-poor-rf-performance-for-wcn6855.patch
+serial-sh-sci-move-runtime-pm-enable-to-sci_probe_si.patch
+serial-sh-sci-clean-sci_ports-0-after-at-earlycon-ex.patch
+serial-sh-sci-increment-the-runtime-usage-counter-fo.patch
+pci-export-pci_intx_unmanaged-and-pcim_intx.patch
+pci-remove-devres-from-pci_intx.patch
+pci-restore-original-intx_disable-bit-by-pcim_intx.patch
+drm-amdkfd-move-gfx12-trap-handler-to-separate-file.patch
+drm-amdkfd-ensure-consistent-barrier-state-saved-in-.patch
+tracing-switch-trace.c-code-over-to-use-guard.patch
+tracing-have-the-error-of-__tracing_resize_ring_buff.patch
+usb-gadget-f_midi-f_midi_complete-to-call-queue_work.patch
+asoc-rockchip-i2s-tdm-fix-shift-config-for-snd_soc_d.patch
+asoc-sof-ipc4-topology-harden-loops-for-looking-up-a.patch
+powerpc-code-patching-disable-kasan-report-during-pa.patch
+powerpc-64s-rewrite-__real_pte-and-__rpte_to_hidx-as.patch
+alsa-hda-realtek-fixup-alc225-depop-procedure.patch
+powerpc-code-patching-fix-kasan-hit-by-not-flagging-.patch
+asoc-imx-audmix-remove-cpu_mclk-which-is-from-cpu-da.patch
+vsock-virtio-fix-variables-initialization-during-res.patch
+geneve-fix-use-after-free-in-geneve_find_dev.patch
+alsa-hda-cirrus-correct-the-full-scale-volume-set-lo.patch
+net-sched-cls_api-fix-error-handling-causing-null-de.patch
+alsa-seq-drop-ump-events-when-no-ump-conversion-is-s.patch
+s390-ism-add-release-function-for-struct-device.patch
+ibmvnic-don-t-reference-skb-after-sending-to-vios.patch
+sockmap-vsock-for-connectible-sockets-allow-only-con.patch
+vsock-bpf-warn-on-socket-without-transport.patch
+tcp-adjust-rcvq_space-after-updating-scaling-ratio.patch
+net-pse-pd-avoid-setting-max_ua-in-regulator-constra.patch
+net-pse-pd-use-power-limit-at-driver-side-instead-of.patch
+net-pse-pd-pd692x0-fix-power-limit-retrieval.patch
+gtp-suppress-list-corruption-splat-in-gtp_net_exit_b.patch
+geneve-suppress-list-corruption-splat-in-geneve_dest.patch
+flow_dissector-fix-handling-of-mixed-port-and-port-r.patch
+flow_dissector-fix-port-range-key-handling-in-bpf-co.patch
+net-add-non-rcu-dev_getbyhwaddr-helper.patch
+arp-switch-to-dev_getbyhwaddr-in-arp_req_set_public.patch
+net-axienet-set-mac_managed_pm.patch
+tcp-drop-secpath-at-the-same-time-as-we-currently-dr.patch
+net-allow-small-head-cache-usage-with-large-max_skb_.patch
+net-make-sure-we-retain-napi-ordering-on-netdev-napi.patch
+eth-iavf-extend-the-netdev_lock-usage.patch
+net-add-netdev_lock-netdev_unlock-helpers.patch
+net-make-netdev_lock-protect-netdev-reg_state.patch
+net-add-netdev-up-protected-by-netdev_lock.patch
+net-protect-netdev-napi_list-with-netdev_lock.patch
+revert-net-skb-introduce-and-use-a-single-page-frag-.patch
--- /dev/null
+From a78cba087590ae94207e2a6b5a0de899569abb4b Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 12:58:49 +0100
+Subject: sockmap, vsock: For connectible sockets allow only connected
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit 8fb5bb169d17cdd12c2dcc2e96830ed487d77a0f ]
+
+sockmap expects all vsocks to have a transport assigned, which is expressed
+in vsock_proto::psock_update_sk_prot(). However, there is an edge case
+where an unconnected (connectible) socket may lose its previously assigned
+transport. This is handled with a NULL check in the vsock/BPF recv path.
+
+Another design detail is that listening vsocks are not supposed to have any
+transport assigned at all. Which implies they are not supported by the
+sockmap. But this is complicated by the fact that a socket, before
+switching to TCP_LISTEN, may have had some transport assigned during a
+failed connect() attempt. Hence, we may end up with a listening vsock in a
+sockmap, which blows up quickly:
+
+KASAN: null-ptr-deref in range [0x0000000000000120-0x0000000000000127]
+CPU: 7 UID: 0 PID: 56 Comm: kworker/7:0 Not tainted 6.14.0-rc1+
+Workqueue: vsock-loopback vsock_loopback_work
+RIP: 0010:vsock_read_skb+0x4b/0x90
+Call Trace:
+ sk_psock_verdict_data_ready+0xa4/0x2e0
+ virtio_transport_recv_pkt+0x1ca8/0x2acc
+ vsock_loopback_work+0x27d/0x3f0
+ process_one_work+0x846/0x1420
+ worker_thread+0x5b3/0xf80
+ kthread+0x35a/0x700
+ ret_from_fork+0x2d/0x70
+ ret_from_fork_asm+0x1a/0x30
+
+For connectible sockets, instead of relying solely on the state of
+vsk->transport, tell sockmap to only allow those representing established
+connections. This aligns with the behaviour for AF_INET and AF_UNIX.
+
+Fixes: 634f1a7110b4 ("vsock: support sockmap")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Acked-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/core/sock_map.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/net/core/sock_map.c b/net/core/sock_map.c
+index f1b9b3958792c..2f1be9baad057 100644
+--- a/net/core/sock_map.c
++++ b/net/core/sock_map.c
+@@ -541,6 +541,9 @@ static bool sock_map_sk_state_allowed(const struct sock *sk)
+ return (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_LISTEN);
+ if (sk_is_stream_unix(sk))
+ return (1 << sk->sk_state) & TCPF_ESTABLISHED;
++ if (sk_is_vsock(sk) &&
++ (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET))
++ return (1 << sk->sk_state) & TCPF_ESTABLISHED;
+ return true;
+ }
+
+--
+2.39.5
+
--- /dev/null
+From d0505d065f6ffa8c9562bda498a219abe8c224df Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 15:29:05 -0800
+Subject: tcp: adjust rcvq_space after updating scaling ratio
+
+From: Jakub Kicinski <kuba@kernel.org>
+
+[ Upstream commit f5da7c45188eea71394bf445655cae2df88a7788 ]
+
+Since commit under Fixes we set the window clamp in accordance
+to newly measured rcvbuf scaling_ratio. If the scaling_ratio
+decreased significantly we may put ourselves in a situation
+where windows become smaller than rcvq_space, preventing
+tcp_rcv_space_adjust() from increasing rcvbuf.
+
+The significant decrease of scaling_ratio is far more likely
+since commit 697a6c8cec03 ("tcp: increase the default TCP scaling ratio"),
+which increased the "default" scaling ratio from ~30% to 50%.
+
+Hitting the bad condition depends a lot on TCP tuning, and
+drivers at play. One of Meta's workloads hits it reliably
+under following conditions:
+ - default rcvbuf of 125k
+ - sender MTU 1500, receiver MTU 5000
+ - driver settles on scaling_ratio of 78 for the config above.
+Initial rcvq_space gets calculated as TCP_INIT_CWND * tp->advmss
+(10 * 5k = 50k). Once we find out the true scaling ratio and
+MSS we clamp the windows to 38k. Triggering the condition also
+depends on the message sequence of this workload. I can't repro
+the problem with simple iperf or TCP_RR-style tests.
+
+Fixes: a2cbb1603943 ("tcp: Update window clamping condition")
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Reviewed-by: Neal Cardwell <ncardwell@google.com>
+Link: https://patch.msgid.link/20250217232905.3162187-1-kuba@kernel.org
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/ipv4/tcp_input.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 4811727b8a022..4ffebfb503269 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -243,9 +243,15 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
+ do_div(val, skb->truesize);
+ tcp_sk(sk)->scaling_ratio = val ? val : 1;
+
+- if (old_ratio != tcp_sk(sk)->scaling_ratio)
+- WRITE_ONCE(tcp_sk(sk)->window_clamp,
+- tcp_win_from_space(sk, sk->sk_rcvbuf));
++ if (old_ratio != tcp_sk(sk)->scaling_ratio) {
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ val = tcp_win_from_space(sk, sk->sk_rcvbuf);
++ tcp_set_window_clamp(sk, val);
++
++ if (tp->window_clamp < tp->rcvq_space.space)
++ tp->rcvq_space.space = tp->window_clamp;
++ }
+ }
+ icsk->icsk_ack.rcv_mss = min_t(unsigned int, len,
+ tcp_sk(sk)->advmss);
+--
+2.39.5
+
--- /dev/null
+From c2201b313d68e8bb44a8215cafc89c247ec37ced Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Mon, 17 Feb 2025 11:23:35 +0100
+Subject: tcp: drop secpath at the same time as we currently drop dst
+
+From: Sabrina Dubroca <sd@queasysnail.net>
+
+[ Upstream commit 9b6412e6979f6f9e0632075f8f008937b5cd4efd ]
+
+Xiumei reported hitting the WARN in xfrm6_tunnel_net_exit while
+running tests that boil down to:
+ - create a pair of netns
+ - run a basic TCP test over ipcomp6
+ - delete the pair of netns
+
+The xfrm_state found on spi_byaddr was not deleted at the time we
+delete the netns, because we still have a reference on it. This
+lingering reference comes from a secpath (which holds a ref on the
+xfrm_state), which is still attached to an skb. This skb is not
+leaked, it ends up on sk_receive_queue and then gets defer-free'd by
+skb_attempt_defer_free.
+
+The problem happens when we defer freeing an skb (push it on one CPU's
+defer_list), and don't flush that list before the netns is deleted. In
+that case, we still have a reference on the xfrm_state that we don't
+expect at this point.
+
+We already drop the skb's dst in the TCP receive path when it's no
+longer needed, so let's also drop the secpath. At this point,
+tcp_filter has already called into the LSM hooks that may require the
+secpath, so it should not be needed anymore. However, in some of those
+places, the MPTCP extension has just been attached to the skb, so we
+cannot simply drop all extensions.
+
+Fixes: 68822bdf76f1 ("net: generalize skb freeing deferral to per-cpu lists")
+Reported-by: Xiumei Mu <xmu@redhat.com>
+Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
+Reviewed-by: Eric Dumazet <edumazet@google.com>
+Link: https://patch.msgid.link/5055ba8f8f72bdcb602faa299faca73c280b7735.1739743613.git.sd@queasysnail.net
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ include/net/tcp.h | 14 ++++++++++++++
+ net/ipv4/tcp_fastopen.c | 4 ++--
+ net/ipv4/tcp_input.c | 8 ++++----
+ net/ipv4/tcp_ipv4.c | 2 +-
+ 4 files changed, 21 insertions(+), 7 deletions(-)
+
+diff --git a/include/net/tcp.h b/include/net/tcp.h
+index e9b37b76e894b..bc04599547c36 100644
+--- a/include/net/tcp.h
++++ b/include/net/tcp.h
+@@ -41,6 +41,7 @@
+ #include <net/inet_ecn.h>
+ #include <net/dst.h>
+ #include <net/mptcp.h>
++#include <net/xfrm.h>
+
+ #include <linux/seq_file.h>
+ #include <linux/memcontrol.h>
+@@ -683,6 +684,19 @@ void tcp_fin(struct sock *sk);
+ void tcp_check_space(struct sock *sk);
+ void tcp_sack_compress_send_ack(struct sock *sk);
+
++static inline void tcp_cleanup_skb(struct sk_buff *skb)
++{
++ skb_dst_drop(skb);
++ secpath_reset(skb);
++}
++
++static inline void tcp_add_receive_queue(struct sock *sk, struct sk_buff *skb)
++{
++ DEBUG_NET_WARN_ON_ONCE(skb_dst(skb));
++ DEBUG_NET_WARN_ON_ONCE(secpath_exists(skb));
++ __skb_queue_tail(&sk->sk_receive_queue, skb);
++}
++
+ /* tcp_timer.c */
+ void tcp_init_xmit_timers(struct sock *);
+ static inline void tcp_clear_xmit_timers(struct sock *sk)
+diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
+index 0f523cbfe329e..32b28fc21b63c 100644
+--- a/net/ipv4/tcp_fastopen.c
++++ b/net/ipv4/tcp_fastopen.c
+@@ -178,7 +178,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
+ if (!skb)
+ return;
+
+- skb_dst_drop(skb);
++ tcp_cleanup_skb(skb);
+ /* segs_in has been initialized to 1 in tcp_create_openreq_child().
+ * Hence, reset segs_in to 0 before calling tcp_segs_in()
+ * to avoid double counting. Also, tcp_segs_in() expects
+@@ -195,7 +195,7 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb)
+ TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_SYN;
+
+ tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
+- __skb_queue_tail(&sk->sk_receive_queue, skb);
++ tcp_add_receive_queue(sk, skb);
+ tp->syn_data_acked = 1;
+
+ /* u64_stats_update_begin(&tp->syncp) not needed here,
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index 4ffebfb503269..0ee22e10fcfae 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -4970,7 +4970,7 @@ static void tcp_ofo_queue(struct sock *sk)
+ tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq);
+ fin = TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN;
+ if (!eaten)
+- __skb_queue_tail(&sk->sk_receive_queue, skb);
++ tcp_add_receive_queue(sk, skb);
+ else
+ kfree_skb_partial(skb, fragstolen);
+
+@@ -5162,7 +5162,7 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb,
+ skb, fragstolen)) ? 1 : 0;
+ tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq);
+ if (!eaten) {
+- __skb_queue_tail(&sk->sk_receive_queue, skb);
++ tcp_add_receive_queue(sk, skb);
+ skb_set_owner_r(skb, sk);
+ }
+ return eaten;
+@@ -5245,7 +5245,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
+ __kfree_skb(skb);
+ return;
+ }
+- skb_dst_drop(skb);
++ tcp_cleanup_skb(skb);
+ __skb_pull(skb, tcp_hdr(skb)->doff * 4);
+
+ reason = SKB_DROP_REASON_NOT_SPECIFIED;
+@@ -6214,7 +6214,7 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb)
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS);
+
+ /* Bulk data transfer: receiver */
+- skb_dst_drop(skb);
++ tcp_cleanup_skb(skb);
+ __skb_pull(skb, tcp_header_len);
+ eaten = tcp_queue_rcv(sk, skb, &fragstolen);
+
+diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+index c26f6c4b7bb4a..96d68f9b1bb9d 100644
+--- a/net/ipv4/tcp_ipv4.c
++++ b/net/ipv4/tcp_ipv4.c
+@@ -2025,7 +2025,7 @@ bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb,
+ */
+ skb_condense(skb);
+
+- skb_dst_drop(skb);
++ tcp_cleanup_skb(skb);
+
+ if (unlikely(tcp_checksum_complete(skb))) {
+ bh_unlock_sock(sk);
+--
+2.39.5
+
--- /dev/null
+From de8c677278f326fee07aab5ae333f11b26992b06 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 13:41:32 -0500
+Subject: tracing: Have the error of __tracing_resize_ring_buffer() passed to
+ user
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit 60b8f711143de7cd9c0f55be0fe7eb94b19eb5c7 ]
+
+Currently if __tracing_resize_ring_buffer() returns an error, the
+tracing_resize_ringbuffer() returns -ENOMEM. But it may not be a memory
+issue that caused the function to fail. If the ring buffer is memory
+mapped, then the resizing of the ring buffer will be disabled. But if the
+user tries to resize the buffer, it will get an -ENOMEM returned, which is
+confusing because there is plenty of memory. The actual error returned was
+-EBUSY, which would make much more sense to the user.
+
+Cc: stable@vger.kernel.org
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Vincent Donnefort <vdonnefort@google.com>
+Link: https://lore.kernel.org/20250213134132.7e4505d7@gandalf.local.home
+Fixes: 117c39200d9d7 ("ring-buffer: Introducing ring-buffer mapping functions")
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Reviewed-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 8 +-------
+ 1 file changed, 1 insertion(+), 7 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index 69aaa8ed7a047..14179a1ee9cca 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -5975,8 +5975,6 @@ static int __tracing_resize_ring_buffer(struct trace_array *tr,
+ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
+ unsigned long size, int cpu_id)
+ {
+- int ret;
+-
+ guard(mutex)(&trace_types_lock);
+
+ if (cpu_id != RING_BUFFER_ALL_CPUS) {
+@@ -5985,11 +5983,7 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
+ return -EINVAL;
+ }
+
+- ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
+- if (ret < 0)
+- ret = -ENOMEM;
+-
+- return ret;
++ return __tracing_resize_ring_buffer(tr, size, cpu_id);
+ }
+
+ static void update_last_data(struct trace_array *tr)
+--
+2.39.5
+
--- /dev/null
+From b5803b612a016f8c736160baf9119a9f458e1045 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 24 Dec 2024 22:14:13 -0500
+Subject: tracing: Switch trace.c code over to use guard()
+
+From: Steven Rostedt <rostedt@goodmis.org>
+
+[ Upstream commit d33b10c0c73adca00f72bf4a153a07b7f5f34715 ]
+
+There are several functions in trace.c that have "goto out;" or
+equivalent on error in order to release locks or free values that were
+allocated. This can be error prone or just simply make the code more
+complex.
+
+Switch every location that ends with unlocking a mutex or freeing on error
+over to using the guard(mutex)() and __free() infrastructure to let the
+compiler worry about releasing locks. This makes the code easier to read
+and understand.
+
+There's one place that should probably return an error but instead return
+0. This does not change the return as the only changes are to do the
+conversion without changing the logic. Fixing that location will have to
+come later.
+
+Cc: Mark Rutland <mark.rutland@arm.com>
+Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+Cc: Peter Zijlstra <peterz@infradead.org>
+Cc: Andrew Morton <akpm@linux-foundation.org>
+Acked-by: Masami Hiramatsu (Google) <mhiramat@kernel.org>
+Link: https://lore.kernel.org/20241224221413.7b8c68c3@batman.local.home
+Signed-off-by: Steven Rostedt (Google) <rostedt@goodmis.org>
+Stable-dep-of: 60b8f711143d ("tracing: Have the error of __tracing_resize_ring_buffer() passed to user")
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ kernel/trace/trace.c | 266 +++++++++++++++----------------------------
+ 1 file changed, 94 insertions(+), 172 deletions(-)
+
+diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
+index d2267b4406cd8..69aaa8ed7a047 100644
+--- a/kernel/trace/trace.c
++++ b/kernel/trace/trace.c
+@@ -26,6 +26,7 @@
+ #include <linux/hardirq.h>
+ #include <linux/linkage.h>
+ #include <linux/uaccess.h>
++#include <linux/cleanup.h>
+ #include <linux/vmalloc.h>
+ #include <linux/ftrace.h>
+ #include <linux/module.h>
+@@ -535,19 +536,16 @@ LIST_HEAD(ftrace_trace_arrays);
+ int trace_array_get(struct trace_array *this_tr)
+ {
+ struct trace_array *tr;
+- int ret = -ENODEV;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (tr == this_tr) {
+ tr->ref++;
+- ret = 0;
+- break;
++ return 0;
+ }
+ }
+- mutex_unlock(&trace_types_lock);
+
+- return ret;
++ return -ENODEV;
+ }
+
+ static void __trace_array_put(struct trace_array *this_tr)
+@@ -1443,22 +1441,20 @@ EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
+ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
+ cond_update_fn_t update)
+ {
+- struct cond_snapshot *cond_snapshot;
+- int ret = 0;
++ struct cond_snapshot *cond_snapshot __free(kfree) =
++ kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
++ int ret;
+
+- cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
+ if (!cond_snapshot)
+ return -ENOMEM;
+
+ cond_snapshot->cond_data = cond_data;
+ cond_snapshot->update = update;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+- if (tr->current_trace->use_max_tr) {
+- ret = -EBUSY;
+- goto fail_unlock;
+- }
++ if (tr->current_trace->use_max_tr)
++ return -EBUSY;
+
+ /*
+ * The cond_snapshot can only change to NULL without the
+@@ -1468,29 +1464,20 @@ int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
+ * do safely with only holding the trace_types_lock and not
+ * having to take the max_lock.
+ */
+- if (tr->cond_snapshot) {
+- ret = -EBUSY;
+- goto fail_unlock;
+- }
++ if (tr->cond_snapshot)
++ return -EBUSY;
+
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
+- goto fail_unlock;
++ return ret;
+
+ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+- tr->cond_snapshot = cond_snapshot;
++ tr->cond_snapshot = no_free_ptr(cond_snapshot);
+ arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
+
+- mutex_unlock(&trace_types_lock);
+-
+- return ret;
+-
+- fail_unlock:
+- mutex_unlock(&trace_types_lock);
+- kfree(cond_snapshot);
+- return ret;
++ return 0;
+ }
+ EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
+
+@@ -2203,10 +2190,10 @@ static __init int init_trace_selftests(void)
+
+ selftests_can_run = true;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+ if (list_empty(&postponed_selftests))
+- goto out;
++ return 0;
+
+ pr_info("Running postponed tracer tests:\n");
+
+@@ -2235,9 +2222,6 @@ static __init int init_trace_selftests(void)
+ }
+ tracing_selftest_running = false;
+
+- out:
+- mutex_unlock(&trace_types_lock);
+-
+ return 0;
+ }
+ core_initcall(init_trace_selftests);
+@@ -2807,7 +2791,7 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
+ int save_tracepoint_printk;
+ int ret;
+
+- mutex_lock(&tracepoint_printk_mutex);
++ guard(mutex)(&tracepoint_printk_mutex);
+ save_tracepoint_printk = tracepoint_printk;
+
+ ret = proc_dointvec(table, write, buffer, lenp, ppos);
+@@ -2820,16 +2804,13 @@ int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
+ tracepoint_printk = 0;
+
+ if (save_tracepoint_printk == tracepoint_printk)
+- goto out;
++ return ret;
+
+ if (tracepoint_printk)
+ static_key_enable(&tracepoint_printk_key.key);
+ else
+ static_key_disable(&tracepoint_printk_key.key);
+
+- out:
+- mutex_unlock(&tracepoint_printk_mutex);
+-
+ return ret;
+ }
+
+@@ -5127,7 +5108,8 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
+ u32 tracer_flags;
+ int i;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
++
+ tracer_flags = tr->current_trace->flags->val;
+ trace_opts = tr->current_trace->flags->opts;
+
+@@ -5144,7 +5126,6 @@ static int tracing_trace_options_show(struct seq_file *m, void *v)
+ else
+ seq_printf(m, "no%s\n", trace_opts[i].name);
+ }
+- mutex_unlock(&trace_types_lock);
+
+ return 0;
+ }
+@@ -5809,7 +5790,7 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
+ return;
+ }
+
+- mutex_lock(&trace_eval_mutex);
++ guard(mutex)(&trace_eval_mutex);
+
+ if (!trace_eval_maps)
+ trace_eval_maps = map_array;
+@@ -5833,8 +5814,6 @@ trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
+ map_array++;
+ }
+ memset(map_array, 0, sizeof(*map_array));
+-
+- mutex_unlock(&trace_eval_mutex);
+ }
+
+ static void trace_create_eval_file(struct dentry *d_tracer)
+@@ -5998,23 +5977,18 @@ ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
+ {
+ int ret;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+ if (cpu_id != RING_BUFFER_ALL_CPUS) {
+ /* make sure, this cpu is enabled in the mask */
+- if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
+- ret = -EINVAL;
+- goto out;
+- }
++ if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
++ return -EINVAL;
+ }
+
+ ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
+ if (ret < 0)
+ ret = -ENOMEM;
+
+-out:
+- mutex_unlock(&trace_types_lock);
+-
+ return ret;
+ }
+
+@@ -6106,9 +6080,9 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ #ifdef CONFIG_TRACER_MAX_TRACE
+ bool had_max_tr;
+ #endif
+- int ret = 0;
++ int ret;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+ update_last_data(tr);
+
+@@ -6116,7 +6090,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
+ RING_BUFFER_ALL_CPUS);
+ if (ret < 0)
+- goto out;
++ return ret;
+ ret = 0;
+ }
+
+@@ -6124,12 +6098,11 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ if (strcmp(t->name, buf) == 0)
+ break;
+ }
+- if (!t) {
+- ret = -EINVAL;
+- goto out;
+- }
++ if (!t)
++ return -EINVAL;
++
+ if (t == tr->current_trace)
+- goto out;
++ return 0;
+
+ #ifdef CONFIG_TRACER_SNAPSHOT
+ if (t->use_max_tr) {
+@@ -6140,27 +6113,23 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
+ if (ret)
+- goto out;
++ return ret;
+ }
+ #endif
+ /* Some tracers won't work on kernel command line */
+ if (system_state < SYSTEM_RUNNING && t->noboot) {
+ pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
+ t->name);
+- goto out;
++ return 0;
+ }
+
+ /* Some tracers are only allowed for the top level buffer */
+- if (!trace_ok_for_array(t, tr)) {
+- ret = -EINVAL;
+- goto out;
+- }
++ if (!trace_ok_for_array(t, tr))
++ return -EINVAL;
+
+ /* If trace pipe files are being read, we can't change the tracer */
+- if (tr->trace_ref) {
+- ret = -EBUSY;
+- goto out;
+- }
++ if (tr->trace_ref)
++ return -EBUSY;
+
+ trace_branch_disable();
+
+@@ -6191,7 +6160,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ if (!had_max_tr && t->use_max_tr) {
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
+- goto out;
++ return ret;
+ }
+ #else
+ tr->current_trace = &nop_trace;
+@@ -6204,17 +6173,15 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)
+ if (t->use_max_tr)
+ tracing_disarm_snapshot(tr);
+ #endif
+- goto out;
++ return ret;
+ }
+ }
+
+ tr->current_trace = t;
+ tr->current_trace->enabled++;
+ trace_branch_enable(tr);
+- out:
+- mutex_unlock(&trace_types_lock);
+
+- return ret;
++ return 0;
+ }
+
+ static ssize_t
+@@ -6292,22 +6259,18 @@ tracing_thresh_write(struct file *filp, const char __user *ubuf,
+ struct trace_array *tr = filp->private_data;
+ int ret;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+ ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
+ if (ret < 0)
+- goto out;
++ return ret;
+
+ if (tr->current_trace->update_thresh) {
+ ret = tr->current_trace->update_thresh(tr);
+ if (ret < 0)
+- goto out;
++ return ret;
+ }
+
+- ret = cnt;
+-out:
+- mutex_unlock(&trace_types_lock);
+-
+- return ret;
++ return cnt;
+ }
+
+ #ifdef CONFIG_TRACER_MAX_TRACE
+@@ -6526,31 +6489,29 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
+ * This is just a matter of traces coherency, the ring buffer itself
+ * is protected.
+ */
+- mutex_lock(&iter->mutex);
++ guard(mutex)(&iter->mutex);
+
+ /* return any leftover data */
+ sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
+ if (sret != -EBUSY)
+- goto out;
++ return sret;
+
+ trace_seq_init(&iter->seq);
+
+ if (iter->trace->read) {
+ sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
+ if (sret)
+- goto out;
++ return sret;
+ }
+
+ waitagain:
+ sret = tracing_wait_pipe(filp);
+ if (sret <= 0)
+- goto out;
++ return sret;
+
+ /* stop when tracing is finished */
+- if (trace_empty(iter)) {
+- sret = 0;
+- goto out;
+- }
++ if (trace_empty(iter))
++ return 0;
+
+ if (cnt >= TRACE_SEQ_BUFFER_SIZE)
+ cnt = TRACE_SEQ_BUFFER_SIZE - 1;
+@@ -6614,9 +6575,6 @@ tracing_read_pipe(struct file *filp, char __user *ubuf,
+ if (sret == -EBUSY)
+ goto waitagain;
+
+-out:
+- mutex_unlock(&iter->mutex);
+-
+ return sret;
+ }
+
+@@ -7208,25 +7166,19 @@ u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_eve
+ */
+ int tracing_set_filter_buffering(struct trace_array *tr, bool set)
+ {
+- int ret = 0;
+-
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+ if (set && tr->no_filter_buffering_ref++)
+- goto out;
++ return 0;
+
+ if (!set) {
+- if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
+- ret = -EINVAL;
+- goto out;
+- }
++ if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
++ return -EINVAL;
+
+ --tr->no_filter_buffering_ref;
+ }
+- out:
+- mutex_unlock(&trace_types_lock);
+
+- return ret;
++ return 0;
+ }
+
+ struct ftrace_buffer_info {
+@@ -7302,12 +7254,10 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ if (ret)
+ return ret;
+
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&trace_types_lock);
+
+- if (tr->current_trace->use_max_tr) {
+- ret = -EBUSY;
+- goto out;
+- }
++ if (tr->current_trace->use_max_tr)
++ return -EBUSY;
+
+ local_irq_disable();
+ arch_spin_lock(&tr->max_lock);
+@@ -7316,24 +7266,20 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ arch_spin_unlock(&tr->max_lock);
+ local_irq_enable();
+ if (ret)
+- goto out;
++ return ret;
+
+ switch (val) {
+ case 0:
+- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+- ret = -EINVAL;
+- break;
+- }
++ if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
++ return -EINVAL;
+ if (tr->allocated_snapshot)
+ free_snapshot(tr);
+ break;
+ case 1:
+ /* Only allow per-cpu swap if the ring buffer supports it */
+ #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
+- if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
+- ret = -EINVAL;
+- break;
+- }
++ if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
++ return -EINVAL;
+ #endif
+ if (tr->allocated_snapshot)
+ ret = resize_buffer_duplicate_size(&tr->max_buffer,
+@@ -7341,7 +7287,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+
+ ret = tracing_arm_snapshot_locked(tr);
+ if (ret)
+- break;
++ return ret;
+
+ /* Now, we're going to swap */
+ if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
+@@ -7368,8 +7314,7 @@ tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
+ *ppos += cnt;
+ ret = cnt;
+ }
+-out:
+- mutex_unlock(&trace_types_lock);
++
+ return ret;
+ }
+
+@@ -7755,12 +7700,11 @@ void tracing_log_err(struct trace_array *tr,
+
+ len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
+
+- mutex_lock(&tracing_err_log_lock);
++ guard(mutex)(&tracing_err_log_lock);
++
+ err = get_tracing_log_err(tr, len);
+- if (PTR_ERR(err) == -ENOMEM) {
+- mutex_unlock(&tracing_err_log_lock);
++ if (PTR_ERR(err) == -ENOMEM)
+ return;
+- }
+
+ snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
+ snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
+@@ -7771,7 +7715,6 @@ void tracing_log_err(struct trace_array *tr,
+ err->info.ts = local_clock();
+
+ list_add_tail(&err->list, &tr->err_log);
+- mutex_unlock(&tracing_err_log_lock);
+ }
+
+ static void clear_tracing_err_log(struct trace_array *tr)
+@@ -9519,20 +9462,17 @@ static int instance_mkdir(const char *name)
+ struct trace_array *tr;
+ int ret;
+
+- mutex_lock(&event_mutex);
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&event_mutex);
++ guard(mutex)(&trace_types_lock);
+
+ ret = -EEXIST;
+ if (trace_array_find(name))
+- goto out_unlock;
++ return -EEXIST;
+
+ tr = trace_array_create(name);
+
+ ret = PTR_ERR_OR_ZERO(tr);
+
+-out_unlock:
+- mutex_unlock(&trace_types_lock);
+- mutex_unlock(&event_mutex);
+ return ret;
+ }
+
+@@ -9582,24 +9522,23 @@ struct trace_array *trace_array_get_by_name(const char *name, const char *system
+ {
+ struct trace_array *tr;
+
+- mutex_lock(&event_mutex);
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&event_mutex);
++ guard(mutex)(&trace_types_lock);
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+- if (tr->name && strcmp(tr->name, name) == 0)
+- goto out_unlock;
++ if (tr->name && strcmp(tr->name, name) == 0) {
++ tr->ref++;
++ return tr;
++ }
+ }
+
+ tr = trace_array_create_systems(name, systems, 0, 0);
+
+ if (IS_ERR(tr))
+ tr = NULL;
+-out_unlock:
+- if (tr)
++ else
+ tr->ref++;
+
+- mutex_unlock(&trace_types_lock);
+- mutex_unlock(&event_mutex);
+ return tr;
+ }
+ EXPORT_SYMBOL_GPL(trace_array_get_by_name);
+@@ -9650,48 +9589,36 @@ static int __remove_instance(struct trace_array *tr)
+ int trace_array_destroy(struct trace_array *this_tr)
+ {
+ struct trace_array *tr;
+- int ret;
+
+ if (!this_tr)
+ return -EINVAL;
+
+- mutex_lock(&event_mutex);
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&event_mutex);
++ guard(mutex)(&trace_types_lock);
+
+- ret = -ENODEV;
+
+ /* Making sure trace array exists before destroying it. */
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+- if (tr == this_tr) {
+- ret = __remove_instance(tr);
+- break;
+- }
++ if (tr == this_tr)
++ return __remove_instance(tr);
+ }
+
+- mutex_unlock(&trace_types_lock);
+- mutex_unlock(&event_mutex);
+-
+- return ret;
++ return -ENODEV;
+ }
+ EXPORT_SYMBOL_GPL(trace_array_destroy);
+
+ static int instance_rmdir(const char *name)
+ {
+ struct trace_array *tr;
+- int ret;
+
+- mutex_lock(&event_mutex);
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&event_mutex);
++ guard(mutex)(&trace_types_lock);
+
+- ret = -ENODEV;
+ tr = trace_array_find(name);
+- if (tr)
+- ret = __remove_instance(tr);
+-
+- mutex_unlock(&trace_types_lock);
+- mutex_unlock(&event_mutex);
++ if (!tr)
++ return -ENODEV;
+
+- return ret;
++ return __remove_instance(tr);
+ }
+
+ static __init void create_trace_instances(struct dentry *d_tracer)
+@@ -9704,19 +9631,16 @@ static __init void create_trace_instances(struct dentry *d_tracer)
+ if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
+ return;
+
+- mutex_lock(&event_mutex);
+- mutex_lock(&trace_types_lock);
++ guard(mutex)(&event_mutex);
++ guard(mutex)(&trace_types_lock);
+
+ list_for_each_entry(tr, &ftrace_trace_arrays, list) {
+ if (!tr->name)
+ continue;
+ if (MEM_FAIL(trace_array_create_dir(tr) < 0,
+ "Failed to create instance directory\n"))
+- break;
++ return;
+ }
+-
+- mutex_unlock(&trace_types_lock);
+- mutex_unlock(&event_mutex);
+ }
+
+ static void
+@@ -9930,7 +9854,7 @@ static void trace_module_remove_evals(struct module *mod)
+ if (!mod->num_trace_evals)
+ return;
+
+- mutex_lock(&trace_eval_mutex);
++ guard(mutex)(&trace_eval_mutex);
+
+ map = trace_eval_maps;
+
+@@ -9942,12 +9866,10 @@ static void trace_module_remove_evals(struct module *mod)
+ map = map->tail.next;
+ }
+ if (!map)
+- goto out;
++ return;
+
+ *last = trace_eval_jmp_to_tail(map)->tail.next;
+ kfree(map);
+- out:
+- mutex_unlock(&trace_eval_mutex);
+ }
+ #else
+ static inline void trace_module_remove_evals(struct module *mod) { }
+--
+2.39.5
+
--- /dev/null
+From f4e06e633c39b308da41504775423abf3a1ec2a7 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Tue, 11 Feb 2025 10:48:05 -0700
+Subject: USB: gadget: f_midi: f_midi_complete to call queue_work
+
+From: Jill Donahue <jilliandonahue58@gmail.com>
+
+[ Upstream commit 4ab37fcb42832cdd3e9d5e50653285ca84d6686f ]
+
+When using USB MIDI, a lock is attempted to be acquired twice through a
+re-entrant call to f_midi_transmit, causing a deadlock.
+
+Fix it by using queue_work() to schedule the inner f_midi_transmit() via
+a high priority work queue from the completion handler.
+
+Link: https://lore.kernel.org/all/CAArt=LjxU0fUZOj06X+5tkeGT+6RbXzpWg1h4t4Fwa_KGVAX6g@mail.gmail.com/
+Fixes: d5daf49b58661 ("USB: gadget: midi: add midi function driver")
+Cc: stable <stable@kernel.org>
+Signed-off-by: Jill Donahue <jilliandonahue58@gmail.com>
+Link: https://lore.kernel.org/r/20250211174805.1369265-1-jdonahue@fender.com
+Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ drivers/usb/gadget/function/f_midi.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/drivers/usb/gadget/function/f_midi.c b/drivers/usb/gadget/function/f_midi.c
+index 47260d65066a8..da82598fcef8a 100644
+--- a/drivers/usb/gadget/function/f_midi.c
++++ b/drivers/usb/gadget/function/f_midi.c
+@@ -283,7 +283,7 @@ f_midi_complete(struct usb_ep *ep, struct usb_request *req)
+ /* Our transmit completed. See if there's more to go.
+ * f_midi_transmit eats req, don't queue it again. */
+ req->length = 0;
+- f_midi_transmit(midi);
++ queue_work(system_highpri_wq, &midi->work);
+ return;
+ }
+ break;
+--
+2.39.5
+
--- /dev/null
+From e7254541cd92b8c893d5cdd2c4adcb3206685050 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Thu, 13 Feb 2025 12:58:50 +0100
+Subject: vsock/bpf: Warn on socket without transport
+
+From: Michal Luczaj <mhal@rbox.co>
+
+[ Upstream commit 857ae05549ee2542317e7084ecaa5f8536634dd9 ]
+
+In the spirit of commit 91751e248256 ("vsock: prevent null-ptr-deref in
+vsock_*[has_data|has_space]"), armorize the "impossible" cases with a
+warning.
+
+Fixes: 634f1a7110b4 ("vsock: support sockmap")
+Signed-off-by: Michal Luczaj <mhal@rbox.co>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Signed-off-by: Paolo Abeni <pabeni@redhat.com>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/vmw_vsock/af_vsock.c | 3 +++
+ net/vmw_vsock/vsock_bpf.c | 2 +-
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
+index 53a081d49d28a..7e3db87ae4333 100644
+--- a/net/vmw_vsock/af_vsock.c
++++ b/net/vmw_vsock/af_vsock.c
+@@ -1189,6 +1189,9 @@ static int vsock_read_skb(struct sock *sk, skb_read_actor_t read_actor)
+ {
+ struct vsock_sock *vsk = vsock_sk(sk);
+
++ if (WARN_ON_ONCE(!vsk->transport))
++ return -ENODEV;
++
+ return vsk->transport->read_skb(vsk, read_actor);
+ }
+
+diff --git a/net/vmw_vsock/vsock_bpf.c b/net/vmw_vsock/vsock_bpf.c
+index f201d9eca1df2..07b96d56f3a57 100644
+--- a/net/vmw_vsock/vsock_bpf.c
++++ b/net/vmw_vsock/vsock_bpf.c
+@@ -87,7 +87,7 @@ static int vsock_bpf_recvmsg(struct sock *sk, struct msghdr *msg,
+ lock_sock(sk);
+ vsk = vsock_sk(sk);
+
+- if (!vsk->transport) {
++ if (WARN_ON_ONCE(!vsk->transport)) {
+ copied = -ENODEV;
+ goto out;
+ }
+--
+2.39.5
+
--- /dev/null
+From 16e3002feb31e2d80a59ac26fafd9b5435e73bc1 Mon Sep 17 00:00:00 2001
+From: Sasha Levin <sashal@kernel.org>
+Date: Fri, 14 Feb 2025 09:22:00 +0800
+Subject: vsock/virtio: fix variables initialization during resuming
+
+From: Junnan Wu <junnan01.wu@samsung.com>
+
+[ Upstream commit 55eff109e76a14e5ed10c8c3c3978d20a35e2a4d ]
+
+When executing suspend to ram twice in a row,
+the `rx_buf_nr` and `rx_buf_max_nr` increase to three times vq->num_free.
+Then after virtqueue_get_buf and `rx_buf_nr` decreased
+in function virtio_transport_rx_work,
+the condition to fill rx buffer
+(rx_buf_nr < rx_buf_max_nr / 2) will never be met.
+
+It is because that `rx_buf_nr` and `rx_buf_max_nr`
+are initialized only in virtio_vsock_probe(),
+but they should be reset whenever virtqueues are recreated,
+like after a suspend/resume.
+
+Move the `rx_buf_nr` and `rx_buf_max_nr` initialization in
+virtio_vsock_vqs_init(), so we are sure that they are properly
+initialized, every time we initialize the virtqueues, either when we
+load the driver or after a suspend/resume.
+
+To prevent erroneous atomic load operations on the `queued_replies`
+in the virtio_transport_send_pkt_work() function
+which may disrupt the scheduling of vsock->rx_work
+when transmitting reply-required socket packets,
+this atomic variable must undergo synchronized initialization
+alongside the preceding two variables after a suspend/resume.
+
+Fixes: bd50c5dc182b ("vsock/virtio: add support for device suspend/resume")
+Link: https://lore.kernel.org/virtualization/20250207052033.2222629-1-junnan01.wu@samsung.com/
+Co-developed-by: Ying Gao <ying01.gao@samsung.com>
+Signed-off-by: Ying Gao <ying01.gao@samsung.com>
+Signed-off-by: Junnan Wu <junnan01.wu@samsung.com>
+Reviewed-by: Luigi Leonardi <leonardi@redhat.com>
+Acked-by: Michael S. Tsirkin <mst@redhat.com>
+Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
+Link: https://patch.msgid.link/20250214012200.1883896-1-junnan01.wu@samsung.com
+Signed-off-by: Jakub Kicinski <kuba@kernel.org>
+Signed-off-by: Sasha Levin <sashal@kernel.org>
+---
+ net/vmw_vsock/virtio_transport.c | 10 +++++++---
+ 1 file changed, 7 insertions(+), 3 deletions(-)
+
+diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
+index b58c3818f284f..f0e48e6911fc4 100644
+--- a/net/vmw_vsock/virtio_transport.c
++++ b/net/vmw_vsock/virtio_transport.c
+@@ -670,6 +670,13 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
+ };
+ int ret;
+
++ mutex_lock(&vsock->rx_lock);
++ vsock->rx_buf_nr = 0;
++ vsock->rx_buf_max_nr = 0;
++ mutex_unlock(&vsock->rx_lock);
++
++ atomic_set(&vsock->queued_replies, 0);
++
+ ret = virtio_find_vqs(vdev, VSOCK_VQ_MAX, vsock->vqs, vqs_info, NULL);
+ if (ret < 0)
+ return ret;
+@@ -779,9 +786,6 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
+
+ vsock->vdev = vdev;
+
+- vsock->rx_buf_nr = 0;
+- vsock->rx_buf_max_nr = 0;
+- atomic_set(&vsock->queued_replies, 0);
+
+ mutex_init(&vsock->tx_lock);
+ mutex_init(&vsock->rx_lock);
+--
+2.39.5
+